List of usage examples for weka.classifiers.functions SMO SMO
SMO
From source file:focusedCrawler.target.EMClassifier.java
License:Open Source License
private String buildClassifier(String suffix) throws Exception { String trainingData = rootDir + File.separator + "trainData_" + suffix; // System.out.println("TRAIN:" +trainingData); String trainWekafile = wekaFileDir + "weka_" + suffix; String testFileDir = rootDir + File.separator + "testData_" + suffix; String outputModel = rootDir + File.separator + "model" + File.separator + "model_" + suffix; CreateWekaInput createWekaFile = new CreateWekaInput(new File(trainingData), new File(testFileDir), stoplist);/* w w w . ja v a 2s . com*/ attributes = createWekaFile.centroid2Weka(trainWekafile); double max = Double.MIN_VALUE; double cValue = 0; int count = 0; for (double c = 0.0625; count < 1; c = c * 0.5) { SMO classifier = new SMO(); String[] argum = new String[] { "-t", trainWekafile, "-C", "" + c, "-v", "-M", "-d", outputModel + c }; String output = Evaluation.evaluateModel(classifier, argum); int index = output.indexOf("Correctly Classified Instances"); if (index >= 0) { int end = output.indexOf("%", index); String line = (output.substring(index, end)).trim(); line = line.substring(line.lastIndexOf(" ")); double accuracy = Double.parseDouble(line.trim()); System.out.println("C=" + c + " acc=" + accuracy); if (accuracy > max) { max = accuracy; cValue = c; } } count++; if (c == 1) { testClassifier(trainWekafile + "_test", outputModel + c); } } return outputModel + cValue; }
From source file:focusedCrawler.target.EMClassifier.java
License:Open Source License
private void testClassifier(String testFile, String outputModel) throws Exception { SMO classifier = new SMO(); // NaiveBayes classifier = new NaiveBayes(); // System.out.println("java -T " + testFile + " -l" + outputModel ); String[] argum = new String[] { "-T", testFile, "-l", outputModel, "-i" }; String output = Evaluation.evaluateModel(classifier, argum); int index = output.indexOf("F-Measure"); if (index >= 0) { index = output.indexOf("\n", index); int end = output.indexOf("\n", index + 1); String line = (output.substring(index, end)).trim(); StringTokenizer tokenizer = new StringTokenizer(line, " "); int count = 0; while (tokenizer.hasMoreTokens()) { String word = tokenizer.nextToken(); if (count == 2) { System.out.println("PRECISION:" + word); }/*from w w w. j av a2 s. co m*/ if (count == 3) { System.out.println("RECALL:" + word); } if (count == 4) { System.out.println("F-MEASURE:" + word); } count++; } } System.out.println("-----------"); }
From source file:focusedCrawler.target.PEBL.java
License:Open Source License
private String buildClassifier(String suffix) throws Exception { String trainingData = rootDir + File.separator + "trainData_" + suffix; // System.out.println("TRAIN:" +trainingData); String trainWekafile = wekaFileDir + "weka_" + suffix; String testFileDir = rootDir + File.separator + "testData_" + suffix; String outputModel = rootDir + File.separator + "model" + File.separator + "model_" + suffix; CreateTCWekaInput createWekaFile = new CreateTCWekaInput(new File(trainingData), new File(testFileDir), stoplist);// w ww . j a va 2s . c o m attributes = createWekaFile.centroid2Weka(trainWekafile); double max = Double.MIN_VALUE; double cValue = 0; int count = 0; for (double c = 0.0625; count < 1; c = c * 0.5) { SMO classifier = new SMO(); String[] argum = new String[] { "-t", trainWekafile, "-C", "" + c, "-v", "-d", outputModel + c }; String output = Evaluation.evaluateModel(classifier, argum); int index = output.indexOf("Correctly Classified Instances"); if (index >= 0) { int end = output.indexOf("%", index); String line = (output.substring(index, end)).trim(); line = line.substring(line.lastIndexOf(" ")); double accuracy = Double.parseDouble(line.trim()); System.out.println("C=" + c + " acc=" + accuracy); if (accuracy > max) { max = accuracy; cValue = c; } } count++; if (c == 1) { testClassifier(trainWekafile + "_test", outputModel + c); } } return outputModel + cValue; }
From source file:fr.unice.i3s.rockflows.experiments.main.IntermediateExecutor.java
private List<InfoClassifier> inputClassifier(Dataset original) throws Exception { List<InfoClassifier> cls = new ArrayList<>(); int id = 0;/* w w w. j a v a2 s.c o m*/ //LogisticRegression: InfoClassifier ic1 = new InfoClassifier(id++); ic1.classifier = new Logistic(); ic1.name = "Logistic Regression"; ic1.properties.requireNumericDataset = true; cls.add(ic1); //SVM: InfoClassifier ic2 = new InfoClassifier(id++); LibSVM ccc = new LibSVM(); //disable ccc.setOptions(new String[] { "-J", //Turn off nominal to binary conversion. "-V" //Turn off missing value replacement }); //ccc.setSVMType(new SelectedTag(LibSVM.SVMTYPE_C_SVC, LibSVM.TAGS_SVMTYPE)); //ccc.setKernelType(new SelectedTag(LibSVM.KERNELTYPE_RBF, LibSVM.TAGS_KERNELTYPE)); //ccc.setEps(0.001); //tolerance ic2.classifier = ccc; ic2.name = "Svm"; ic2.properties.requireNumericDataset = true; cls.add(ic2); //J48: InfoClassifier ic3 = new InfoClassifier(id++); ic3.classifier = new J48(); ic3.name = "J48"; ic3.properties.manageMissingValues = true; cls.add(ic3); //NBTree: InfoClassifier ic4 = new InfoClassifier(id++); ic4.classifier = new NBTree(); ic4.name = "NBTree"; ic4.properties.manageMissingValues = true; cls.add(ic4); //RandomForest: InfoClassifier ic5 = new InfoClassifier(id++); RandomForest ccc2 = new RandomForest(); ccc2.setNumTrees(500); ccc2.setMaxDepth(0); ic5.classifier = ccc2; ic5.name = "Random Forest"; ic5.properties.manageMissingValues = true; cls.add(ic5); //Logistic Model Trees (LMT): InfoClassifier ic6 = new InfoClassifier(id++); ic6.classifier = new LMT(); ic6.name = "Logistic Model Tree"; ic6.properties.manageMissingValues = true; cls.add(ic6); //Alternating Decision Trees (ADTree): InfoClassifier ic7 = new InfoClassifier(id++); if (original.trainingSet.numClasses() > 2) { MultiClassClassifier mc = new MultiClassClassifier(); mc.setOptions(new String[] { "-M", "3" }); //1 vs 1 mc.setClassifier(new ADTree()); ic7.classifier = mc; ic7.name = "1-vs-1 Alternating Decision Tree"; } else { ic7.classifier = new ADTree(); ic7.name = "Alternating Decision Tree"; } ic7.properties.manageMultiClass = false; ic7.properties.manageMissingValues = true; cls.add(ic7); //Naive Bayes: InfoClassifier ic8 = new InfoClassifier(id++); ic8.classifier = new NaiveBayes(); ic8.name = "Naive Bayes"; ic8.properties.manageMissingValues = true; cls.add(ic8); //Bayesian Networks: /* All Bayes network algorithms implemented in Weka assume the following for the data set: all variables are discrete finite variables. If you have a data set with continuous variables, you can use the following filter to discretize them: weka.filters.unsupervised.attribute.Discretize no instances have missing values. If there are missing values in the data set, values are filled in using the following filter: weka.filters.unsupervised.attribute.ReplaceMissingValues The first step performed by buildClassifier is checking if the data set fulfills those assumptions. If those assumptions are not met, the data set is automatically filtered and a warning is written to STDERR.2 */ InfoClassifier ic9 = new InfoClassifier(id++); ic9.classifier = new BayesNet(); ic9.name = "Bayesian Network"; ic9.properties.requireNominalDataset = true; cls.add(ic9); //IBK InfoClassifier ic10 = new InfoClassifier(id++); ic10.classifier = new IBk(); ic10.name = "IBk"; ic10.properties.manageMissingValues = true; cls.add(ic10); //JRip: InfoClassifier ic11 = new InfoClassifier(id++); ic11.classifier = new JRip(); ic11.name = "JRip"; ic11.properties.manageMissingValues = true; cls.add(ic11); //MultilayerPerceptron(MLP): InfoClassifier ic12 = new InfoClassifier(id++); ic12.classifier = new MultilayerPerceptron(); ic12.name = "Multillayer Perceptron"; ic12.properties.requireNumericDataset = true; cls.add(ic12); //Bagging RepTree: InfoClassifier ic14 = new InfoClassifier(id++); REPTree base3 = new REPTree(); Bagging ccc4 = new Bagging(); ccc4.setClassifier(base3); ic14.classifier = ccc4; ic14.name = "Bagging RepTree"; ic14.properties.manageMissingValues = true; cls.add(ic14); //Bagging J48 InfoClassifier ic15 = new InfoClassifier(id++); Bagging ccc5 = new Bagging(); ccc5.setClassifier(new J48()); ic15.classifier = ccc5; ic15.name = "Bagging J48"; ic15.properties.manageMissingValues = true; cls.add(ic15); //Bagging NBTree InfoClassifier ic16 = new InfoClassifier(id++); Bagging ccc6 = new Bagging(); ccc6.setClassifier(new NBTree()); ic16.classifier = ccc6; ic16.name = "Bagging NBTree"; ic16.properties.manageMissingValues = true; cls.add(ic16); //Bagging OneR: InfoClassifier ic17 = new InfoClassifier(id++); Bagging ccc7 = new Bagging(); ccc7.setClassifier(new OneR()); ic17.classifier = ccc7; ic17.name = "Bagging OneR"; ic17.properties.requireNominalDataset = true; ic17.properties.manageMissingValues = true; cls.add(ic17); //Bagging Jrip InfoClassifier ic18 = new InfoClassifier(id++); Bagging ccc8 = new Bagging(); ccc8.setClassifier(new JRip()); ic18.classifier = ccc8; ic18.name = "Bagging JRip"; ic18.properties.manageMissingValues = true; cls.add(ic18); //MultiboostAB DecisionStump InfoClassifier ic24 = new InfoClassifier(id++); MultiBoostAB ccc14 = new MultiBoostAB(); ccc14.setClassifier(new DecisionStump()); ic24.classifier = ccc14; ic24.name = "MultiboostAB DecisionStump"; ic24.properties.manageMissingValues = true; cls.add(ic24); //MultiboostAB OneR InfoClassifier ic25 = new InfoClassifier(id++); MultiBoostAB ccc15 = new MultiBoostAB(); ccc15.setClassifier(new OneR()); ic25.classifier = ccc15; ic25.name = "MultiboostAB OneR"; ic25.properties.requireNominalDataset = true; cls.add(ic25); //MultiboostAB J48 InfoClassifier ic27 = new InfoClassifier(id++); MultiBoostAB ccc17 = new MultiBoostAB(); ccc17.setClassifier(new J48()); ic27.classifier = ccc17; ic27.name = "MultiboostAB J48"; ic27.properties.manageMissingValues = true; cls.add(ic27); //MultiboostAB Jrip InfoClassifier ic28 = new InfoClassifier(id++); MultiBoostAB ccc18 = new MultiBoostAB(); ccc18.setClassifier(new JRip()); ic28.classifier = ccc18; ic28.name = "MultiboostAB JRip"; cls.add(ic28); //MultiboostAB NBTree InfoClassifier ic29 = new InfoClassifier(id++); MultiBoostAB ccc19 = new MultiBoostAB(); ccc19.setClassifier(new NBTree()); ic29.classifier = ccc19; ic29.name = "MultiboostAB NBTree"; ic29.properties.manageMissingValues = true; cls.add(ic29); //RotationForest RandomTree InfoClassifier ic32 = new InfoClassifier(id++); RotationForest ccc21 = new RotationForest(); RandomTree rtr5 = new RandomTree(); rtr5.setMinNum(2); rtr5.setAllowUnclassifiedInstances(true); ccc21.setClassifier(rtr5); ic32.classifier = ccc21; ic32.name = "RotationForest RandomTree"; ic32.properties.manageMissingValues = true; cls.add(ic32); //RotationForest J48: InfoClassifier ic33 = new InfoClassifier(id++); J48 base6 = new J48(); RotationForest ccc22 = new RotationForest(); ccc22.setClassifier(base6); ic33.classifier = ccc22; ic33.name = "RotationForest J48"; ic33.properties.manageMissingValues = true; cls.add(ic33); //RandomCommittee RandomTree: InfoClassifier ic34 = new InfoClassifier(id++); RandomTree rtr4 = new RandomTree(); rtr4.setMinNum(2); rtr4.setAllowUnclassifiedInstances(true); RandomCommittee ccc23 = new RandomCommittee(); ccc23.setClassifier(rtr4); ic34.classifier = ccc23; ic34.name = "RandomComittee RandomTree"; ic34.properties.manageMissingValues = true; cls.add(ic34); //Class via Clustering: SimpleKMeans //N.B: it can't handle date attributes InfoClassifier ic35 = new InfoClassifier(id++); ClassificationViaClustering ccc24 = new ClassificationViaClustering(); SimpleKMeans km = new SimpleKMeans(); km.setNumClusters(original.trainingSet.numClasses()); ccc24.setClusterer(km); ic35.classifier = ccc24; ic35.name = "Classification via Clustering: KMeans"; ic35.properties.requireNumericDataset = true; cls.add(ic35); //Class via Clustering: FarthestFirst InfoClassifier ic36 = new InfoClassifier(id++); ClassificationViaClustering ccc25 = new ClassificationViaClustering(); FarthestFirst ff = new FarthestFirst(); ff.setNumClusters(original.trainingSet.numClasses()); ccc25.setClusterer(ff); ic36.classifier = ccc25; ic36.name = "Classification via Clustering: FarthestFirst"; ic36.properties.requireNumericDataset = true; cls.add(ic36); //SMO InfoClassifier ic37 = new InfoClassifier(id++); ic37.classifier = new SMO(); ic37.properties.requireNumericDataset = true; ic37.properties.manageMultiClass = false; ic37.name = "Smo"; cls.add(ic37); //Random Subspace InfoClassifier ic38 = new InfoClassifier(id++); RandomSubSpace sub = new RandomSubSpace(); sub.setClassifier(new REPTree()); ic38.classifier = sub; ic38.name = "Random Subspaces of RepTree"; ic38.properties.manageMissingValues = true; cls.add(ic38); //PART rule based InfoClassifier ic39 = new InfoClassifier(id++); PART p39 = new PART(); p39.setOptions(new String[] { "-C", "0.5" }); ic39.classifier = new PART(); ic39.name = "PART"; ic39.properties.manageMissingValues = true; cls.add(ic39); //Decision-Table / Naive Bayes InfoClassifier ic40 = new InfoClassifier(id++); ic40.classifier = new DTNB(); ic40.name = "DTNB"; ic40.properties.manageMissingValues = true; cls.add(ic40); //Ridor Rule based InfoClassifier ic41 = new InfoClassifier(id++); ic41.classifier = new Ridor(); ic41.name = "Ridor"; ic41.properties.manageMissingValues = true; cls.add(ic41); //Decision Table InfoClassifier ic42 = new InfoClassifier(id++); ic42.classifier = new DecisionTable(); ic42.name = "Decision Table"; ic42.properties.manageMissingValues = true; cls.add(ic42); //Conjunctive Rule InfoClassifier ic43 = new InfoClassifier(id++); ic43.classifier = new ConjunctiveRule(); ic43.name = "Conjunctive Rule"; ic43.properties.manageMissingValues = true; cls.add(ic43); //LogitBoost Decision Stump InfoClassifier ic44 = new InfoClassifier(id++); LogitBoost lb = new LogitBoost(); lb.setOptions(new String[] { "-L", "1.79" }); lb.setClassifier(new DecisionStump()); ic44.classifier = lb; ic44.name = "LogitBoost Decision Stump"; ic44.properties.manageMissingValues = true; cls.add(ic44); //Raced Incremental Logit Boost, Decision Stump InfoClassifier ic45 = new InfoClassifier(id++); RacedIncrementalLogitBoost rlb = new RacedIncrementalLogitBoost(); rlb.setClassifier(new DecisionStump()); ic45.classifier = rlb; ic45.name = "Raced Incremental Logit Boost, Decision Stumps"; ic45.properties.manageMissingValues = true; cls.add(ic45); //AdaboostM1 decision stump InfoClassifier ic46 = new InfoClassifier(id++); AdaBoostM1 adm = new AdaBoostM1(); adm.setClassifier(new DecisionStump()); ic46.classifier = adm; ic46.name = "AdaboostM1, Decision Stumps"; ic46.properties.manageMissingValues = true; cls.add(ic46); //AdaboostM1 J48 InfoClassifier ic47 = new InfoClassifier(id++); AdaBoostM1 adm2 = new AdaBoostM1(); adm2.setClassifier(new J48()); ic47.classifier = adm2; ic47.name = "AdaboostM1, J48"; ic47.properties.manageMissingValues = true; cls.add(ic47); //MultiboostAb Decision Table InfoClassifier ic48 = new InfoClassifier(id++); MultiBoostAB mba = new MultiBoostAB(); mba.setClassifier(new DecisionTable()); ic48.classifier = mba; ic48.name = "MultiboostAB, Decision Table"; ic48.properties.manageMissingValues = true; cls.add(ic48); //Multiboost NaiveBayes InfoClassifier ic49 = new InfoClassifier(id++); MultiBoostAB mba2 = new MultiBoostAB(); mba2.setClassifier(new NaiveBayes()); ic49.classifier = mba2; ic49.name = "MultiboostAB, Naive Bayes"; ic49.properties.manageMissingValues = true; cls.add(ic49); //Multiboost PART InfoClassifier ic50 = new InfoClassifier(id++); MultiBoostAB mba3 = new MultiBoostAB(); mba3.setClassifier(new PART()); ic50.classifier = mba3; ic50.name = "MultiboostAB, PART"; ic50.properties.manageMissingValues = true; cls.add(ic50); //Multiboost Random Tree InfoClassifier ic51 = new InfoClassifier(id++); MultiBoostAB mba4 = new MultiBoostAB(); RandomTree rtr3 = new RandomTree(); rtr3.setMinNum(2); rtr3.setAllowUnclassifiedInstances(true); mba4.setClassifier(rtr3); ic51.classifier = mba4; ic51.name = "MultiboostAB, RandomTree"; ic51.properties.manageMissingValues = true; cls.add(ic51); //Multiboost Rep Tree InfoClassifier ic52 = new InfoClassifier(id++); MultiBoostAB mba5 = new MultiBoostAB(); mba5.setClassifier(new REPTree()); ic52.classifier = mba5; ic52.name = "MultiboostAB, RepTree"; ic52.properties.manageMissingValues = true; cls.add(ic52); //Bagging Decision Stump InfoClassifier ic53 = new InfoClassifier(id++); Bagging bag = new Bagging(); bag.setClassifier(new DecisionStump()); ic53.classifier = bag; ic53.name = "Bagging Decision Stump"; ic53.properties.manageMissingValues = true; cls.add(ic53); //Bagging Decision Table InfoClassifier ic54 = new InfoClassifier(id++); Bagging bag1 = new Bagging(); bag1.setClassifier(new DecisionTable()); ic54.classifier = bag1; ic54.name = "Bagging Decision Table"; ic54.properties.manageMissingValues = true; cls.add(ic54); //Bagging HyperPipes InfoClassifier ic55 = new InfoClassifier(id++); Bagging bag2 = new Bagging(); bag2.setClassifier(new HyperPipes()); ic55.classifier = bag2; ic55.name = "Bagging Hyper Pipes"; cls.add(ic55); //Bagging Naive Bayes InfoClassifier ic56 = new InfoClassifier(id++); Bagging bag3 = new Bagging(); bag3.setClassifier(new NaiveBayes()); ic56.classifier = bag3; ic56.name = "Bagging Naive Bayes"; ic56.properties.manageMissingValues = true; cls.add(ic56); //Bagging PART InfoClassifier ic57 = new InfoClassifier(id++); Bagging bag4 = new Bagging(); bag4.setClassifier(new PART()); ic57.classifier = bag4; ic57.name = "Bagging PART"; ic57.properties.manageMissingValues = true; cls.add(ic57); //Bagging RandomTree InfoClassifier ic58 = new InfoClassifier(id++); Bagging bag5 = new Bagging(); RandomTree rtr2 = new RandomTree(); rtr2.setMinNum(2); rtr2.setAllowUnclassifiedInstances(true); bag5.setClassifier(rtr2); ic58.classifier = bag5; ic58.name = "Bagging RandomTree"; ic58.properties.manageMissingValues = true; cls.add(ic58); //NNge InfoClassifier ic59 = new InfoClassifier(id++); NNge nng = new NNge(); nng.setNumFoldersMIOption(1); nng.setNumAttemptsOfGeneOption(5); ic59.classifier = nng; ic59.name = "NNge"; cls.add(ic59); //OrdinalClassClassifier J48 InfoClassifier ic60 = new InfoClassifier(id++); OrdinalClassClassifier occ = new OrdinalClassClassifier(); occ.setClassifier(new J48()); ic60.classifier = occ; ic60.name = "OrdinalClassClassifier J48"; ic60.properties.manageMissingValues = true; cls.add(ic60); //Hyper Pipes InfoClassifier ic61 = new InfoClassifier(id++); ic61.classifier = new HyperPipes(); ic61.name = "Hyper Pipes"; cls.add(ic61); //Classification via Regression, M5P used by default InfoClassifier ic62 = new InfoClassifier(id++); ic62.classifier = new ClassificationViaRegression(); ic62.name = "Classification ViaRegression, M5P"; ic62.properties.requireNumericDataset = true; cls.add(ic62); //RBF Network InfoClassifier ic64 = new InfoClassifier(id++); RBFNetwork rbf = new RBFNetwork(); rbf.setRidge(0.00000001); //10^-8 rbf.setNumClusters(original.trainingSet.numAttributes() / 2); ic64.classifier = rbf; ic64.name = "RBF Network"; ic64.properties.requireNumericDataset = true; if (!original.properties.isStandardized) { ic64.properties.compatibleWithDataset = false; } cls.add(ic64); //RandomTree InfoClassifier ic66 = new InfoClassifier(id++); RandomTree rtr = new RandomTree(); rtr.setMinNum(2); rtr.setAllowUnclassifiedInstances(true); ic66.classifier = rtr; ic66.name = "Random Tree"; ic66.properties.manageMissingValues = true; cls.add(ic66); //RepTree InfoClassifier ic67 = new InfoClassifier(id++); REPTree rept = new REPTree(); ic67.classifier = rept; ic67.name = "Rep Tree"; ic67.properties.manageMissingValues = true; cls.add(ic67); //Decision Stump InfoClassifier ic68 = new InfoClassifier(id++); ic68.classifier = new DecisionStump(); ic68.name = "Decision Stump"; ic68.properties.manageMissingValues = true; cls.add(ic68); //OneR InfoClassifier ic69 = new InfoClassifier(id++); ic69.classifier = new OneR(); ic69.name = "OneR"; ic69.properties.requireNominalDataset = true; ic69.properties.manageMissingValues = true; cls.add(ic69); //LWL InfoClassifier ic71 = new InfoClassifier(id++); ic71.classifier = new LWL(); ic71.name = "LWL"; ic71.properties.manageMissingValues = true; cls.add(ic71); //Bagging LWL InfoClassifier ic72 = new InfoClassifier(id++); Bagging bg72 = new Bagging(); bg72.setClassifier(new LWL()); ic72.classifier = bg72; ic72.name = "Bagging LWL"; ic72.properties.manageMissingValues = true; cls.add(ic72); //Decorate InfoClassifier ic73 = new InfoClassifier(id++); ic73.classifier = new Decorate(); ic73.name = "Decorate"; ic73.properties.manageMissingValues = true; ic73.properties.minNumTrainingInstances = 15; this.indexDecorate = id - 1; cls.add(ic73); //Dagging InfoClassifier ic74 = new InfoClassifier(id++); Dagging dng = new Dagging(); dng.setClassifier(new SMO()); dng.setNumFolds(4); ic74.classifier = dng; ic74.properties.requireNumericDataset = true; ic74.properties.manageMultiClass = false; ic74.name = "Dagging SMO"; cls.add(ic74); //IB1 InfoClassifier ic75 = new InfoClassifier(id++); ic75.classifier = new IB1(); ic75.properties.manageMissingValues = true; ic75.name = "IB1"; cls.add(ic75); //Simple Logistic InfoClassifier ic76 = new InfoClassifier(id++); ic76.classifier = new SimpleLogistic(); ic76.properties.requireNumericDataset = true; ic76.name = "Simple Logistic"; cls.add(ic76); //VFI InfoClassifier ic77 = new InfoClassifier(id++); ic77.classifier = new VFI(); ic77.properties.manageMissingValues = true; ic77.name = "VFI"; cls.add(ic77); //check if classifier satisfies the constraints of min #instances checkMinNumInstanes(cls, original.trainingSet); return cls; }
From source file:javaapplication1.JavaApplication1.java
/** * @param args the command line arguments *//*from ww w . j av a 2s.com*/ public static void main(String[] args) throws Exception { // TODO code application logic here int numInstances = 20000; double curr_acc = 0; double curr_oba = 0; double curr_NB = 0; double[] acc_adacc; acc_adacc = new double[numInstances]; double[] acc_oba; acc_oba = new double[numInstances]; double[] acc_NB; acc_NB = new double[numInstances]; double[] pred_NB; pred_NB = new double[numInstances]; Instance[] window; window = new Instance[numInstances]; Classifier tr = new DecisionStump(); Classifier lbay = new NaiveBayes(); Classifier learner = new HoeffdingTree(); ADWIN adw = new ADWIN(0.1); Classifier knn = new kNN(); Classifier adacc = new ADACC(); Classifier oba = new OzaBagAdwin(); //Evaluator ev = new Accuracy(); Classifier rep = new repro(); Classifier tut = new DecisionStumpTutorial(); //J48 tree = new J48(); SMO svm = new SMO(); ArffFileStream readfile = new ArffFileStream(); readfile.arffFileOption.setValue("E:/PhD/data/SEA/comb_short.arff"); readfile.prepareForUse(); lbay.setModelContext(readfile.getHeader()); learner.setModelContext(readfile.getHeader()); tut.setModelContext(readfile.getHeader()); knn.setModelContext(readfile.getHeader()); adacc.setModelContext(readfile.getHeader()); oba.setModelContext(readfile.getHeader()); learner.prepareForUse(); lbay.prepareForUse(); tut.prepareForUse(); knn.prepareForUse(); adacc.prepareForUse(); oba.prepareForUse(); int numberSamplesCorrect = 0; int numberSamplesCorrectN = 0; int numberSamplesCorrectTR = 0; int numberSamplesCorrectknn = 0; int numberSamplesCorrectadacc = 0; int numberSamplesCorrectoba = 0; int numberSamples = 0; boolean isTesting = true; while (readfile.hasMoreInstances() && numberSamples < numInstances) { Instance trainInst = readfile.nextInstance(); if (isTesting) { //adwin drift detection //if change detected leaners are trained if (adw.setInput(trainInst.value(0))) //Input data into Adwin { System.out.println("Change Detected at " + numberSamples); System.out.println("window width is " + adw.getWidth()); oba.trainOnInstance(trainInst); adacc.trainOnInstance(trainInst); lbay.trainOnInstance(trainInst); } if (learner.correctlyClassifies(trainInst)) { numberSamplesCorrect++; } if (lbay.correctlyClassifies(trainInst)) { numberSamplesCorrectN++; curr_NB = 1; } else { curr_NB = 0; } if (tut.correctlyClassifies(trainInst)) { numberSamplesCorrectTR++; } if (knn.correctlyClassifies(trainInst)) { numberSamplesCorrectknn++; } if (adacc.correctlyClassifies(trainInst)) { numberSamplesCorrectadacc++; curr_acc = 1; } else { curr_acc = 0; } if (oba.correctlyClassifies(trainInst)) { numberSamplesCorrectoba++; curr_oba = 1; } else { curr_oba = 0; } } if (numberSamples == 0) { acc_adacc[numberSamples] = curr_acc; acc_oba[numberSamples] = curr_oba; acc_NB[numberSamples] = curr_NB; } if (numberSamples > 0) { acc_adacc[numberSamples] = acc_adacc[numberSamples - 1] + ((curr_acc - acc_adacc[numberSamples - 1]) / numberSamples); acc_oba[numberSamples] = acc_oba[numberSamples - 1] + ((curr_oba - acc_oba[numberSamples - 1]) / numberSamples); acc_NB[numberSamples] = acc_NB[numberSamples - 1] + ((curr_NB - acc_NB[numberSamples - 1]) / numberSamples); } numberSamples++; if (numberSamples < 5000) { oba.trainOnInstance(trainInst); adacc.trainOnInstance(trainInst); lbay.trainOnInstance(trainInst); } knn.trainOnInstance(trainInst); lbay.trainOnInstance(trainInst); learner.trainOnInstance(trainInst); tut.trainOnInstance(trainInst); } double accuracy = 100.0 * (double) numberSamplesCorrect / (double) numberSamples; System.out.println(numberSamples + " instances processed with HoeffdingTree " + accuracy + "% accuracy"); double accuracyN = 100.0 * (double) numberSamplesCorrectN / (double) numberSamples; System.out.println(numberSamples + " instances processed with NaivBayes " + accuracyN + "% accuracy"); double accuracyTR = 100.0 * (double) numberSamplesCorrectTR / (double) numberSamples; System.out.println(numberSamples + " instances processed with DecisionStump " + accuracyTR + "% accuracy"); double accuracyADACC = 100.0 * (double) numberSamplesCorrectadacc / (double) numberSamples; System.out.println(numberSamples + " instances processed with ADACC " + accuracyADACC + "% accuracy"); double accuracyoba = 100.0 * (double) numberSamplesCorrectoba / (double) numberSamples; System.out.println(numberSamples + " instances processed with OzaBagAdwin " + accuracyoba + "% accuracy"); double accuracyknn = 100.0 * (double) numberSamplesCorrectknn / (double) numberSamples; System.out.println(numberSamples + " instances processed with kNN " + accuracyknn + "% accuracy"); //System.out.println("Mean:"+adw.getEstimation()); //System.out.println("Variance:"+adw.getVariance()); //System.out.println("Stand. dev:"+Math.sqrt(adw.getVariance())); System.out.println("Number of ADWIN drift detections: " + adw.getNumberDetections()); String s = Arrays.toString(acc_adacc); s = s.substring(1, s.length() - 2); String s2 = Arrays.toString(pred_NB); s2 = s2.substring(1, s2.length() - 1); String sNB = Arrays.toString(acc_NB); sNB = sNB.substring(1, sNB.length() - 1); String csv = "E:/PhD/data/SEA/accres.csv"; CSVWriter writer = new CSVWriter(new FileWriter(csv)); List<String[]> data = new ArrayList<>(); data.add(new String[] { s }); //data.add(new String[] {s2}); writer.writeAll(data); System.out.println("CSV written successfully."); writer.close(); }
From source file:jjj.asap.sas.models1.job.BuildBasicMetaCostModels.java
License:Open Source License
@Override protected void run() throws Exception { // validate args if (!Bucket.isBucket("datasets", inputBucket)) { throw new FileNotFoundException(inputBucket); }//from w w w .ja va2s . co m if (!Bucket.isBucket("models", outputBucket)) { throw new FileNotFoundException(outputBucket); } // create prototype classifiers Map<String, Classifier> prototypes = new HashMap<String, Classifier>(); // Bagged REPTrees Bagging baggedTrees = new Bagging(); baggedTrees.setNumExecutionSlots(1); baggedTrees.setNumIterations(100); baggedTrees.setClassifier(new REPTree()); baggedTrees.setCalcOutOfBag(false); prototypes.put("Bagged-REPTrees", baggedTrees); // Bagged SMO Bagging baggedSVM = new Bagging(); baggedSVM.setNumExecutionSlots(1); baggedSVM.setNumIterations(100); baggedSVM.setClassifier(new SMO()); baggedSVM.setCalcOutOfBag(false); prototypes.put("Bagged-SMO", baggedSVM); // Meta Cost model for Naive Bayes Bagging bagging = new Bagging(); bagging.setNumExecutionSlots(1); bagging.setNumIterations(100); bagging.setClassifier(new NaiveBayes()); CostSensitiveClassifier meta = new CostSensitiveClassifier(); meta.setClassifier(bagging); meta.setMinimizeExpectedCost(true); prototypes.put("CostSensitive-MinimizeExpectedCost-NaiveBayes", bagging); // init multi-threading Job.startService(); final Queue<Future<Object>> queue = new LinkedList<Future<Object>>(); // get the input from the bucket List<String> names = Bucket.getBucketItems("datasets", this.inputBucket); for (String dsn : names) { // for each prototype classifier for (Map.Entry<String, Classifier> prototype : prototypes.entrySet()) { // // speical logic for meta cost // Classifier alg = AbstractClassifier.makeCopy(prototype.getValue()); if (alg instanceof CostSensitiveClassifier) { int essaySet = Contest.getEssaySet(dsn); String matrix = Contest.getRubrics(essaySet).size() == 3 ? "cost3.txt" : "cost4.txt"; ((CostSensitiveClassifier) alg) .setCostMatrix(new CostMatrix(new FileReader("/asap/sas/trunk/" + matrix))); } // use InfoGain to discard useless attributes AttributeSelectedClassifier classifier = new AttributeSelectedClassifier(); classifier.setEvaluator(new InfoGainAttributeEval()); Ranker ranker = new Ranker(); ranker.setThreshold(0.0001); classifier.setSearch(ranker); classifier.setClassifier(alg); queue.add(Job.submit( new ModelBuilder(dsn, "InfoGain-" + prototype.getKey(), classifier, this.outputBucket))); } } // wait on complete Progress progress = new Progress(queue.size(), this.getClass().getSimpleName()); while (!queue.isEmpty()) { try { queue.remove().get(); } catch (Exception e) { Job.log("ERROR", e.toString()); } progress.tick(); } progress.done(); Job.stopService(); }
From source file:jjj.asap.sas.models1.job.BuildBasicModels.java
License:Open Source License
@Override protected void run() throws Exception { // validate args if (!Bucket.isBucket("datasets", inputBucket)) { throw new FileNotFoundException(inputBucket); }/*from w w w . j a v a 2 s . c om*/ if (!Bucket.isBucket("models", outputBucket)) { throw new FileNotFoundException(outputBucket); } // create prototype classifiers Map<String, Classifier> prototypes = new HashMap<String, Classifier>(); // bayes BayesNet net = new BayesNet(); net.setEstimator(new BMAEstimator()); prototypes.put("BayesNet", net); prototypes.put("NaiveBayes", new NaiveBayes()); // functions prototypes.put("RBFNetwork", new RBFNetwork()); prototypes.put("SMO", new SMO()); // init multi-threading Job.startService(); final Queue<Future<Object>> queue = new LinkedList<Future<Object>>(); // get the input from the bucket List<String> names = Bucket.getBucketItems("datasets", this.inputBucket); for (String dsn : names) { // for each prototype classifier for (Map.Entry<String, Classifier> prototype : prototypes.entrySet()) { // use InfoGain to discard useless attributes AttributeSelectedClassifier classifier = new AttributeSelectedClassifier(); classifier.setEvaluator(new InfoGainAttributeEval()); Ranker ranker = new Ranker(); ranker.setThreshold(0.0001); classifier.setSearch(ranker); classifier.setClassifier(AbstractClassifier.makeCopy(prototype.getValue())); queue.add(Job.submit( new ModelBuilder(dsn, "InfoGain-" + prototype.getKey(), classifier, this.outputBucket))); } } // wait on complete Progress progress = new Progress(queue.size(), this.getClass().getSimpleName()); while (!queue.isEmpty()) { try { queue.remove().get(); } catch (Exception e) { Job.log("ERROR", e.toString()); } progress.tick(); } progress.done(); Job.stopService(); }
From source file:jjj.asap.sas.models1.job.BuildRBFKernelModels.java
License:Open Source License
@Override protected void run() throws Exception { // validate args if (!Bucket.isBucket("datasets", inputBucket)) { throw new FileNotFoundException(inputBucket); }// w w w .ja va2 s.com if (!Bucket.isBucket("models", outputBucket)) { throw new FileNotFoundException(outputBucket); } // init multi-threading Job.startService(); final Queue<Future<Object>> queue = new LinkedList<Future<Object>>(); // get the input from the bucket List<String> names = Bucket.getBucketItems("datasets", this.inputBucket); for (String dsn : names) { SMO smo = new SMO(); smo.setFilterType(new SelectedTag(SMO.FILTER_NONE, SMO.TAGS_FILTER)); smo.setBuildLogisticModels(true); RBFKernel kernel = new RBFKernel(); kernel.setGamma(0.05); smo.setKernel(kernel); AttributeSelectedClassifier asc = new AttributeSelectedClassifier(); asc.setEvaluator(new InfoGainAttributeEval()); Ranker ranker = new Ranker(); ranker.setThreshold(0.01); asc.setSearch(ranker); asc.setClassifier(smo); queue.add(Job.submit(new ModelBuilder(dsn, "InfoGain-SMO-RBFKernel", asc, this.outputBucket))); } // wait on complete Progress progress = new Progress(queue.size(), this.getClass().getSimpleName()); while (!queue.isEmpty()) { try { queue.remove().get(); } catch (Exception e) { Job.log("ERROR", e.toString()); } progress.tick(); } progress.done(); Job.stopService(); }
From source file:kfst.classifier.WekaClassifier.java
License:Open Source License
/** * This method builds and evaluates the support vector machine(SVM) * classifier. The SMO are used as the SVM classifier implemented in the * Weka software./* w ww . j a va 2 s . c om*/ * * @param pathTrainData the path of the train set * @param pathTestData the path of the test set * @param svmKernel the kernel to use * * @return the classification accuracy */ public static double SVM(String pathTrainData, String pathTestData, String svmKernel) { double resultValue = 0; try { BufferedReader readerTrain = new BufferedReader(new FileReader(pathTrainData)); Instances dataTrain = new Instances(readerTrain); readerTrain.close(); dataTrain.setClassIndex(dataTrain.numAttributes() - 1); BufferedReader readerTest = new BufferedReader(new FileReader(pathTestData)); Instances dataTest = new Instances(readerTest); readerTest.close(); dataTest.setClassIndex(dataTest.numAttributes() - 1); SMO svm = new SMO(); if (svmKernel.equals("Polynomial kernel")) { svm.setKernel(weka.classifiers.functions.supportVector.PolyKernel.class.newInstance()); } else if (svmKernel.equals("RBF kernel")) { svm.setKernel(weka.classifiers.functions.supportVector.RBFKernel.class.newInstance()); } else { svm.setKernel(weka.classifiers.functions.supportVector.Puk.class.newInstance()); } svm.buildClassifier(dataTrain); Evaluation eval = new Evaluation(dataTest); eval.evaluateModel(svm, dataTest); resultValue = 100 - (eval.errorRate() * 100); } catch (Exception ex) { Logger.getLogger(WekaClassifier.class.getName()).log(Level.SEVERE, null, ex); } return resultValue; }
From source file:KFST.featureSelection.embedded.SVMBasedMethods.MSVM_RFE.java
License:Open Source License
/** * generates binary classifiers (SVM by applying k-fold cross validation * resampling strategy) using input data and based on selected feature * subset./*from w w w. j a v a2s.c om*/ * * @param selectedFeature an array of indices of the selected feature subset * * @return an array of the weights of features */ protected double[][] buildSVM_KFoldCrossValidation(int[] selectedFeature) { double[][] weights = new double[numRun * kFoldValue][selectedFeature.length]; int classifier = 0; for (int i = 0; i < numRun; i++) { double[][] copyTrainSet = ArraysFunc.copyDoubleArray2D(trainSet); //shuffles the train set MathFunc.randomize(copyTrainSet); int numSampleInFold = copyTrainSet.length / kFoldValue; int remainder = copyTrainSet.length % kFoldValue; int indexStart = 0; for (int k = 0; k < kFoldValue; k++) { int indexEnd = indexStart + numSampleInFold; if (k < remainder) { indexEnd++; } double[][] subTrainSet = ArraysFunc.copyDoubleArray2D(copyTrainSet, indexStart, indexEnd); String nameDataCSV = TEMP_PATH + "dataCSV[" + i + "-" + k + "].csv"; String nameDataARFF = TEMP_PATH + "dataARFF[" + i + "-" + k + "].arff"; FileFunc.createCSVFile(subTrainSet, selectedFeature, nameDataCSV, nameFeatures, classLabel); FileFunc.convertCSVtoARFF(nameDataCSV, nameDataARFF, TEMP_PATH, selectedFeature.length, numFeatures, nameFeatures, numClass, classLabel); try { BufferedReader readerTrain = new BufferedReader(new FileReader(nameDataARFF)); Instances dataTrain = new Instances(readerTrain); readerTrain.close(); dataTrain.setClassIndex(dataTrain.numAttributes() - 1); SMO svm = new SMO(); svm.setC(parameterC); svm.setKernel(WekaSVMKernel.parse(kernelType)); svm.buildClassifier(dataTrain); double[] weightsSparse = svm.sparseWeights()[0][1]; int[] indicesSparse = svm.sparseIndices()[0][1]; for (int m = 0; m < weightsSparse.length; m++) { weights[classifier][indicesSparse[m]] = weightsSparse[m]; } } catch (Exception ex) { Logger.getLogger(MSVM_RFE.class.getName()).log(Level.SEVERE, null, ex); } indexStart = indexEnd; classifier++; } } return weights; }