List of usage examples for weka.classifiers.meta AdaBoostM1 AdaBoostM1
public AdaBoostM1()
From source file:classify.Classifier.java
/** * @param args the command line arguments *///from ww w.j a v a 2s .c om public static void main(String[] args) { //read in data try { DataSource input = new DataSource("no_missing_values.csv"); Instances data = input.getDataSet(); //Instances data = readFile("newfixed.txt"); missingValuesRows(data); setAttributeValues(data); data.setClassIndex(data.numAttributes() - 1); //boosting AdaBoostM1 boosting = new AdaBoostM1(); boosting.setNumIterations(25); boosting.setClassifier(new DecisionStump()); //build the classifier boosting.buildClassifier(data); //evaluate using 10-fold cross validation Evaluation e1 = new Evaluation(data); e1.crossValidateModel(boosting, data, 10, new Random(1)); DecimalFormat nf = new DecimalFormat("0.000"); System.out.println("Results of Boosting with Decision Stumps:"); System.out.println(boosting.toString()); System.out.println("Results of Cross Validation:"); System.out.println("Number of correctly classified instances: " + e1.correct() + " (" + nf.format(e1.pctCorrect()) + "%)"); System.out.println("Number of incorrectly classified instances: " + e1.incorrect() + " (" + nf.format(e1.pctIncorrect()) + "%)"); System.out.println("TP Rate: " + nf.format(e1.weightedTruePositiveRate() * 100) + "%"); System.out.println("FP Rate: " + nf.format(e1.weightedFalsePositiveRate() * 100) + "%"); System.out.println("Precision: " + nf.format(e1.weightedPrecision() * 100) + "%"); System.out.println("Recall: " + nf.format(e1.weightedRecall() * 100) + "%"); System.out.println(); System.out.println("Confusion Matrix:"); for (int i = 0; i < e1.confusionMatrix().length; i++) { for (int j = 0; j < e1.confusionMatrix()[0].length; j++) { System.out.print(e1.confusionMatrix()[i][j] + " "); } System.out.println(); } System.out.println(); System.out.println(); System.out.println(); //logistic regression Logistic l = new Logistic(); l.buildClassifier(data); e1 = new Evaluation(data); e1.crossValidateModel(l, data, 10, new Random(1)); System.out.println("Results of Logistic Regression:"); System.out.println(l.toString()); System.out.println("Results of Cross Validation:"); System.out.println("Number of correctly classified instances: " + e1.correct() + " (" + nf.format(e1.pctCorrect()) + "%)"); System.out.println("Number of incorrectly classified instances: " + e1.incorrect() + " (" + nf.format(e1.pctIncorrect()) + "%)"); System.out.println("TP Rate: " + nf.format(e1.weightedTruePositiveRate() * 100) + "%"); System.out.println("FP Rate: " + nf.format(e1.weightedFalsePositiveRate() * 100) + "%"); System.out.println("Precision: " + nf.format(e1.weightedPrecision() * 100) + "%"); System.out.println("Recall: " + nf.format(e1.weightedRecall() * 100) + "%"); System.out.println(); System.out.println("Confusion Matrix:"); for (int i = 0; i < e1.confusionMatrix().length; i++) { for (int j = 0; j < e1.confusionMatrix()[0].length; j++) { System.out.print(e1.confusionMatrix()[i][j] + " "); } System.out.println(); } } catch (Exception ex) { //data couldn't be read, so end program System.out.println("Exception thrown, program ending."); } }
From source file:com.ivanrf.smsspam.SpamClassifier.java
License:Apache License
private static FilteredClassifier initFilterClassifier(int wordsToKeep, String tokenizerOp, boolean useAttributeSelection, String classifierOp, boolean boosting) throws Exception { StringToWordVector filter = new StringToWordVector(); filter.setDoNotOperateOnPerClassBasis(true); filter.setLowerCaseTokens(true);/*from w w w. j a v a2 s.c o m*/ filter.setWordsToKeep(wordsToKeep); if (!tokenizerOp.equals(TOKENIZER_DEFAULT)) { //Make a tokenizer WordTokenizer wt = new WordTokenizer(); if (tokenizerOp.equals(TOKENIZER_COMPLETE)) wt.setDelimiters(" \r\n\t.,;:\'\"()?!-+*&#$%/=<>[]_`@\\^{}"); else //TOKENIZER_COMPLETE_NUMBERS) wt.setDelimiters(" \r\n\t.,;:\'\"()?!-+*&#$%/=<>[]_`@\\^{}|~0123456789"); filter.setTokenizer(wt); } FilteredClassifier classifier = new FilteredClassifier(); classifier.setFilter(filter); if (useAttributeSelection) { AttributeSelection as = new AttributeSelection(); as.setEvaluator(new InfoGainAttributeEval()); Ranker r = new Ranker(); r.setThreshold(0); as.setSearch(r); MultiFilter mf = new MultiFilter(); mf.setFilters(new Filter[] { filter, as }); classifier.setFilter(mf); } if (classifierOp.equals(CLASSIFIER_SMO)) classifier.setClassifier(new SMO()); else if (classifierOp.equals(CLASSIFIER_NB)) classifier.setClassifier(new NaiveBayes()); else if (classifierOp.equals(CLASSIFIER_IB1)) classifier.setClassifier(new IBk(1)); else if (classifierOp.equals(CLASSIFIER_IB3)) classifier.setClassifier(new IBk(3)); else if (classifierOp.equals(CLASSIFIER_IB5)) classifier.setClassifier(new IBk(5)); else if (classifierOp.equals(CLASSIFIER_PART)) classifier.setClassifier(new PART()); //Tarda mucho if (boosting) { AdaBoostM1 boost = new AdaBoostM1(); boost.setClassifier(classifier.getClassifier()); classifier.setClassifier(boost); //Con NB tarda mucho } return classifier; }
From source file:fr.unice.i3s.rockflows.experiments.main.IntermediateExecutor.java
private List<InfoClassifier> inputClassifier(Dataset original) throws Exception { List<InfoClassifier> cls = new ArrayList<>(); int id = 0;// w w w . j av a 2 s. c om //LogisticRegression: InfoClassifier ic1 = new InfoClassifier(id++); ic1.classifier = new Logistic(); ic1.name = "Logistic Regression"; ic1.properties.requireNumericDataset = true; cls.add(ic1); //SVM: InfoClassifier ic2 = new InfoClassifier(id++); LibSVM ccc = new LibSVM(); //disable ccc.setOptions(new String[] { "-J", //Turn off nominal to binary conversion. "-V" //Turn off missing value replacement }); //ccc.setSVMType(new SelectedTag(LibSVM.SVMTYPE_C_SVC, LibSVM.TAGS_SVMTYPE)); //ccc.setKernelType(new SelectedTag(LibSVM.KERNELTYPE_RBF, LibSVM.TAGS_KERNELTYPE)); //ccc.setEps(0.001); //tolerance ic2.classifier = ccc; ic2.name = "Svm"; ic2.properties.requireNumericDataset = true; cls.add(ic2); //J48: InfoClassifier ic3 = new InfoClassifier(id++); ic3.classifier = new J48(); ic3.name = "J48"; ic3.properties.manageMissingValues = true; cls.add(ic3); //NBTree: InfoClassifier ic4 = new InfoClassifier(id++); ic4.classifier = new NBTree(); ic4.name = "NBTree"; ic4.properties.manageMissingValues = true; cls.add(ic4); //RandomForest: InfoClassifier ic5 = new InfoClassifier(id++); RandomForest ccc2 = new RandomForest(); ccc2.setNumTrees(500); ccc2.setMaxDepth(0); ic5.classifier = ccc2; ic5.name = "Random Forest"; ic5.properties.manageMissingValues = true; cls.add(ic5); //Logistic Model Trees (LMT): InfoClassifier ic6 = new InfoClassifier(id++); ic6.classifier = new LMT(); ic6.name = "Logistic Model Tree"; ic6.properties.manageMissingValues = true; cls.add(ic6); //Alternating Decision Trees (ADTree): InfoClassifier ic7 = new InfoClassifier(id++); if (original.trainingSet.numClasses() > 2) { MultiClassClassifier mc = new MultiClassClassifier(); mc.setOptions(new String[] { "-M", "3" }); //1 vs 1 mc.setClassifier(new ADTree()); ic7.classifier = mc; ic7.name = "1-vs-1 Alternating Decision Tree"; } else { ic7.classifier = new ADTree(); ic7.name = "Alternating Decision Tree"; } ic7.properties.manageMultiClass = false; ic7.properties.manageMissingValues = true; cls.add(ic7); //Naive Bayes: InfoClassifier ic8 = new InfoClassifier(id++); ic8.classifier = new NaiveBayes(); ic8.name = "Naive Bayes"; ic8.properties.manageMissingValues = true; cls.add(ic8); //Bayesian Networks: /* All Bayes network algorithms implemented in Weka assume the following for the data set: all variables are discrete finite variables. If you have a data set with continuous variables, you can use the following filter to discretize them: weka.filters.unsupervised.attribute.Discretize no instances have missing values. If there are missing values in the data set, values are filled in using the following filter: weka.filters.unsupervised.attribute.ReplaceMissingValues The first step performed by buildClassifier is checking if the data set fulfills those assumptions. If those assumptions are not met, the data set is automatically filtered and a warning is written to STDERR.2 */ InfoClassifier ic9 = new InfoClassifier(id++); ic9.classifier = new BayesNet(); ic9.name = "Bayesian Network"; ic9.properties.requireNominalDataset = true; cls.add(ic9); //IBK InfoClassifier ic10 = new InfoClassifier(id++); ic10.classifier = new IBk(); ic10.name = "IBk"; ic10.properties.manageMissingValues = true; cls.add(ic10); //JRip: InfoClassifier ic11 = new InfoClassifier(id++); ic11.classifier = new JRip(); ic11.name = "JRip"; ic11.properties.manageMissingValues = true; cls.add(ic11); //MultilayerPerceptron(MLP): InfoClassifier ic12 = new InfoClassifier(id++); ic12.classifier = new MultilayerPerceptron(); ic12.name = "Multillayer Perceptron"; ic12.properties.requireNumericDataset = true; cls.add(ic12); //Bagging RepTree: InfoClassifier ic14 = new InfoClassifier(id++); REPTree base3 = new REPTree(); Bagging ccc4 = new Bagging(); ccc4.setClassifier(base3); ic14.classifier = ccc4; ic14.name = "Bagging RepTree"; ic14.properties.manageMissingValues = true; cls.add(ic14); //Bagging J48 InfoClassifier ic15 = new InfoClassifier(id++); Bagging ccc5 = new Bagging(); ccc5.setClassifier(new J48()); ic15.classifier = ccc5; ic15.name = "Bagging J48"; ic15.properties.manageMissingValues = true; cls.add(ic15); //Bagging NBTree InfoClassifier ic16 = new InfoClassifier(id++); Bagging ccc6 = new Bagging(); ccc6.setClassifier(new NBTree()); ic16.classifier = ccc6; ic16.name = "Bagging NBTree"; ic16.properties.manageMissingValues = true; cls.add(ic16); //Bagging OneR: InfoClassifier ic17 = new InfoClassifier(id++); Bagging ccc7 = new Bagging(); ccc7.setClassifier(new OneR()); ic17.classifier = ccc7; ic17.name = "Bagging OneR"; ic17.properties.requireNominalDataset = true; ic17.properties.manageMissingValues = true; cls.add(ic17); //Bagging Jrip InfoClassifier ic18 = new InfoClassifier(id++); Bagging ccc8 = new Bagging(); ccc8.setClassifier(new JRip()); ic18.classifier = ccc8; ic18.name = "Bagging JRip"; ic18.properties.manageMissingValues = true; cls.add(ic18); //MultiboostAB DecisionStump InfoClassifier ic24 = new InfoClassifier(id++); MultiBoostAB ccc14 = new MultiBoostAB(); ccc14.setClassifier(new DecisionStump()); ic24.classifier = ccc14; ic24.name = "MultiboostAB DecisionStump"; ic24.properties.manageMissingValues = true; cls.add(ic24); //MultiboostAB OneR InfoClassifier ic25 = new InfoClassifier(id++); MultiBoostAB ccc15 = new MultiBoostAB(); ccc15.setClassifier(new OneR()); ic25.classifier = ccc15; ic25.name = "MultiboostAB OneR"; ic25.properties.requireNominalDataset = true; cls.add(ic25); //MultiboostAB J48 InfoClassifier ic27 = new InfoClassifier(id++); MultiBoostAB ccc17 = new MultiBoostAB(); ccc17.setClassifier(new J48()); ic27.classifier = ccc17; ic27.name = "MultiboostAB J48"; ic27.properties.manageMissingValues = true; cls.add(ic27); //MultiboostAB Jrip InfoClassifier ic28 = new InfoClassifier(id++); MultiBoostAB ccc18 = new MultiBoostAB(); ccc18.setClassifier(new JRip()); ic28.classifier = ccc18; ic28.name = "MultiboostAB JRip"; cls.add(ic28); //MultiboostAB NBTree InfoClassifier ic29 = new InfoClassifier(id++); MultiBoostAB ccc19 = new MultiBoostAB(); ccc19.setClassifier(new NBTree()); ic29.classifier = ccc19; ic29.name = "MultiboostAB NBTree"; ic29.properties.manageMissingValues = true; cls.add(ic29); //RotationForest RandomTree InfoClassifier ic32 = new InfoClassifier(id++); RotationForest ccc21 = new RotationForest(); RandomTree rtr5 = new RandomTree(); rtr5.setMinNum(2); rtr5.setAllowUnclassifiedInstances(true); ccc21.setClassifier(rtr5); ic32.classifier = ccc21; ic32.name = "RotationForest RandomTree"; ic32.properties.manageMissingValues = true; cls.add(ic32); //RotationForest J48: InfoClassifier ic33 = new InfoClassifier(id++); J48 base6 = new J48(); RotationForest ccc22 = new RotationForest(); ccc22.setClassifier(base6); ic33.classifier = ccc22; ic33.name = "RotationForest J48"; ic33.properties.manageMissingValues = true; cls.add(ic33); //RandomCommittee RandomTree: InfoClassifier ic34 = new InfoClassifier(id++); RandomTree rtr4 = new RandomTree(); rtr4.setMinNum(2); rtr4.setAllowUnclassifiedInstances(true); RandomCommittee ccc23 = new RandomCommittee(); ccc23.setClassifier(rtr4); ic34.classifier = ccc23; ic34.name = "RandomComittee RandomTree"; ic34.properties.manageMissingValues = true; cls.add(ic34); //Class via Clustering: SimpleKMeans //N.B: it can't handle date attributes InfoClassifier ic35 = new InfoClassifier(id++); ClassificationViaClustering ccc24 = new ClassificationViaClustering(); SimpleKMeans km = new SimpleKMeans(); km.setNumClusters(original.trainingSet.numClasses()); ccc24.setClusterer(km); ic35.classifier = ccc24; ic35.name = "Classification via Clustering: KMeans"; ic35.properties.requireNumericDataset = true; cls.add(ic35); //Class via Clustering: FarthestFirst InfoClassifier ic36 = new InfoClassifier(id++); ClassificationViaClustering ccc25 = new ClassificationViaClustering(); FarthestFirst ff = new FarthestFirst(); ff.setNumClusters(original.trainingSet.numClasses()); ccc25.setClusterer(ff); ic36.classifier = ccc25; ic36.name = "Classification via Clustering: FarthestFirst"; ic36.properties.requireNumericDataset = true; cls.add(ic36); //SMO InfoClassifier ic37 = new InfoClassifier(id++); ic37.classifier = new SMO(); ic37.properties.requireNumericDataset = true; ic37.properties.manageMultiClass = false; ic37.name = "Smo"; cls.add(ic37); //Random Subspace InfoClassifier ic38 = new InfoClassifier(id++); RandomSubSpace sub = new RandomSubSpace(); sub.setClassifier(new REPTree()); ic38.classifier = sub; ic38.name = "Random Subspaces of RepTree"; ic38.properties.manageMissingValues = true; cls.add(ic38); //PART rule based InfoClassifier ic39 = new InfoClassifier(id++); PART p39 = new PART(); p39.setOptions(new String[] { "-C", "0.5" }); ic39.classifier = new PART(); ic39.name = "PART"; ic39.properties.manageMissingValues = true; cls.add(ic39); //Decision-Table / Naive Bayes InfoClassifier ic40 = new InfoClassifier(id++); ic40.classifier = new DTNB(); ic40.name = "DTNB"; ic40.properties.manageMissingValues = true; cls.add(ic40); //Ridor Rule based InfoClassifier ic41 = new InfoClassifier(id++); ic41.classifier = new Ridor(); ic41.name = "Ridor"; ic41.properties.manageMissingValues = true; cls.add(ic41); //Decision Table InfoClassifier ic42 = new InfoClassifier(id++); ic42.classifier = new DecisionTable(); ic42.name = "Decision Table"; ic42.properties.manageMissingValues = true; cls.add(ic42); //Conjunctive Rule InfoClassifier ic43 = new InfoClassifier(id++); ic43.classifier = new ConjunctiveRule(); ic43.name = "Conjunctive Rule"; ic43.properties.manageMissingValues = true; cls.add(ic43); //LogitBoost Decision Stump InfoClassifier ic44 = new InfoClassifier(id++); LogitBoost lb = new LogitBoost(); lb.setOptions(new String[] { "-L", "1.79" }); lb.setClassifier(new DecisionStump()); ic44.classifier = lb; ic44.name = "LogitBoost Decision Stump"; ic44.properties.manageMissingValues = true; cls.add(ic44); //Raced Incremental Logit Boost, Decision Stump InfoClassifier ic45 = new InfoClassifier(id++); RacedIncrementalLogitBoost rlb = new RacedIncrementalLogitBoost(); rlb.setClassifier(new DecisionStump()); ic45.classifier = rlb; ic45.name = "Raced Incremental Logit Boost, Decision Stumps"; ic45.properties.manageMissingValues = true; cls.add(ic45); //AdaboostM1 decision stump InfoClassifier ic46 = new InfoClassifier(id++); AdaBoostM1 adm = new AdaBoostM1(); adm.setClassifier(new DecisionStump()); ic46.classifier = adm; ic46.name = "AdaboostM1, Decision Stumps"; ic46.properties.manageMissingValues = true; cls.add(ic46); //AdaboostM1 J48 InfoClassifier ic47 = new InfoClassifier(id++); AdaBoostM1 adm2 = new AdaBoostM1(); adm2.setClassifier(new J48()); ic47.classifier = adm2; ic47.name = "AdaboostM1, J48"; ic47.properties.manageMissingValues = true; cls.add(ic47); //MultiboostAb Decision Table InfoClassifier ic48 = new InfoClassifier(id++); MultiBoostAB mba = new MultiBoostAB(); mba.setClassifier(new DecisionTable()); ic48.classifier = mba; ic48.name = "MultiboostAB, Decision Table"; ic48.properties.manageMissingValues = true; cls.add(ic48); //Multiboost NaiveBayes InfoClassifier ic49 = new InfoClassifier(id++); MultiBoostAB mba2 = new MultiBoostAB(); mba2.setClassifier(new NaiveBayes()); ic49.classifier = mba2; ic49.name = "MultiboostAB, Naive Bayes"; ic49.properties.manageMissingValues = true; cls.add(ic49); //Multiboost PART InfoClassifier ic50 = new InfoClassifier(id++); MultiBoostAB mba3 = new MultiBoostAB(); mba3.setClassifier(new PART()); ic50.classifier = mba3; ic50.name = "MultiboostAB, PART"; ic50.properties.manageMissingValues = true; cls.add(ic50); //Multiboost Random Tree InfoClassifier ic51 = new InfoClassifier(id++); MultiBoostAB mba4 = new MultiBoostAB(); RandomTree rtr3 = new RandomTree(); rtr3.setMinNum(2); rtr3.setAllowUnclassifiedInstances(true); mba4.setClassifier(rtr3); ic51.classifier = mba4; ic51.name = "MultiboostAB, RandomTree"; ic51.properties.manageMissingValues = true; cls.add(ic51); //Multiboost Rep Tree InfoClassifier ic52 = new InfoClassifier(id++); MultiBoostAB mba5 = new MultiBoostAB(); mba5.setClassifier(new REPTree()); ic52.classifier = mba5; ic52.name = "MultiboostAB, RepTree"; ic52.properties.manageMissingValues = true; cls.add(ic52); //Bagging Decision Stump InfoClassifier ic53 = new InfoClassifier(id++); Bagging bag = new Bagging(); bag.setClassifier(new DecisionStump()); ic53.classifier = bag; ic53.name = "Bagging Decision Stump"; ic53.properties.manageMissingValues = true; cls.add(ic53); //Bagging Decision Table InfoClassifier ic54 = new InfoClassifier(id++); Bagging bag1 = new Bagging(); bag1.setClassifier(new DecisionTable()); ic54.classifier = bag1; ic54.name = "Bagging Decision Table"; ic54.properties.manageMissingValues = true; cls.add(ic54); //Bagging HyperPipes InfoClassifier ic55 = new InfoClassifier(id++); Bagging bag2 = new Bagging(); bag2.setClassifier(new HyperPipes()); ic55.classifier = bag2; ic55.name = "Bagging Hyper Pipes"; cls.add(ic55); //Bagging Naive Bayes InfoClassifier ic56 = new InfoClassifier(id++); Bagging bag3 = new Bagging(); bag3.setClassifier(new NaiveBayes()); ic56.classifier = bag3; ic56.name = "Bagging Naive Bayes"; ic56.properties.manageMissingValues = true; cls.add(ic56); //Bagging PART InfoClassifier ic57 = new InfoClassifier(id++); Bagging bag4 = new Bagging(); bag4.setClassifier(new PART()); ic57.classifier = bag4; ic57.name = "Bagging PART"; ic57.properties.manageMissingValues = true; cls.add(ic57); //Bagging RandomTree InfoClassifier ic58 = new InfoClassifier(id++); Bagging bag5 = new Bagging(); RandomTree rtr2 = new RandomTree(); rtr2.setMinNum(2); rtr2.setAllowUnclassifiedInstances(true); bag5.setClassifier(rtr2); ic58.classifier = bag5; ic58.name = "Bagging RandomTree"; ic58.properties.manageMissingValues = true; cls.add(ic58); //NNge InfoClassifier ic59 = new InfoClassifier(id++); NNge nng = new NNge(); nng.setNumFoldersMIOption(1); nng.setNumAttemptsOfGeneOption(5); ic59.classifier = nng; ic59.name = "NNge"; cls.add(ic59); //OrdinalClassClassifier J48 InfoClassifier ic60 = new InfoClassifier(id++); OrdinalClassClassifier occ = new OrdinalClassClassifier(); occ.setClassifier(new J48()); ic60.classifier = occ; ic60.name = "OrdinalClassClassifier J48"; ic60.properties.manageMissingValues = true; cls.add(ic60); //Hyper Pipes InfoClassifier ic61 = new InfoClassifier(id++); ic61.classifier = new HyperPipes(); ic61.name = "Hyper Pipes"; cls.add(ic61); //Classification via Regression, M5P used by default InfoClassifier ic62 = new InfoClassifier(id++); ic62.classifier = new ClassificationViaRegression(); ic62.name = "Classification ViaRegression, M5P"; ic62.properties.requireNumericDataset = true; cls.add(ic62); //RBF Network InfoClassifier ic64 = new InfoClassifier(id++); RBFNetwork rbf = new RBFNetwork(); rbf.setRidge(0.00000001); //10^-8 rbf.setNumClusters(original.trainingSet.numAttributes() / 2); ic64.classifier = rbf; ic64.name = "RBF Network"; ic64.properties.requireNumericDataset = true; if (!original.properties.isStandardized) { ic64.properties.compatibleWithDataset = false; } cls.add(ic64); //RandomTree InfoClassifier ic66 = new InfoClassifier(id++); RandomTree rtr = new RandomTree(); rtr.setMinNum(2); rtr.setAllowUnclassifiedInstances(true); ic66.classifier = rtr; ic66.name = "Random Tree"; ic66.properties.manageMissingValues = true; cls.add(ic66); //RepTree InfoClassifier ic67 = new InfoClassifier(id++); REPTree rept = new REPTree(); ic67.classifier = rept; ic67.name = "Rep Tree"; ic67.properties.manageMissingValues = true; cls.add(ic67); //Decision Stump InfoClassifier ic68 = new InfoClassifier(id++); ic68.classifier = new DecisionStump(); ic68.name = "Decision Stump"; ic68.properties.manageMissingValues = true; cls.add(ic68); //OneR InfoClassifier ic69 = new InfoClassifier(id++); ic69.classifier = new OneR(); ic69.name = "OneR"; ic69.properties.requireNominalDataset = true; ic69.properties.manageMissingValues = true; cls.add(ic69); //LWL InfoClassifier ic71 = new InfoClassifier(id++); ic71.classifier = new LWL(); ic71.name = "LWL"; ic71.properties.manageMissingValues = true; cls.add(ic71); //Bagging LWL InfoClassifier ic72 = new InfoClassifier(id++); Bagging bg72 = new Bagging(); bg72.setClassifier(new LWL()); ic72.classifier = bg72; ic72.name = "Bagging LWL"; ic72.properties.manageMissingValues = true; cls.add(ic72); //Decorate InfoClassifier ic73 = new InfoClassifier(id++); ic73.classifier = new Decorate(); ic73.name = "Decorate"; ic73.properties.manageMissingValues = true; ic73.properties.minNumTrainingInstances = 15; this.indexDecorate = id - 1; cls.add(ic73); //Dagging InfoClassifier ic74 = new InfoClassifier(id++); Dagging dng = new Dagging(); dng.setClassifier(new SMO()); dng.setNumFolds(4); ic74.classifier = dng; ic74.properties.requireNumericDataset = true; ic74.properties.manageMultiClass = false; ic74.name = "Dagging SMO"; cls.add(ic74); //IB1 InfoClassifier ic75 = new InfoClassifier(id++); ic75.classifier = new IB1(); ic75.properties.manageMissingValues = true; ic75.name = "IB1"; cls.add(ic75); //Simple Logistic InfoClassifier ic76 = new InfoClassifier(id++); ic76.classifier = new SimpleLogistic(); ic76.properties.requireNumericDataset = true; ic76.name = "Simple Logistic"; cls.add(ic76); //VFI InfoClassifier ic77 = new InfoClassifier(id++); ic77.classifier = new VFI(); ic77.properties.manageMissingValues = true; ic77.name = "VFI"; cls.add(ic77); //check if classifier satisfies the constraints of min #instances checkMinNumInstanes(cls, original.trainingSet); return cls; }
From source file:hero.unstable.util.classification.wekaClassifier.java
public wekaClassifier(String nameClassifier, String classifierOpt, int seed, int folds) throws Exception { String[] opts = classifierOpt.split(" "); this.seed = seed; this.folds = folds; // Create classifier if (nameClassifier.equals("AdaBoostM1")) { this.classifier = new AdaBoostM1(); } else if (nameClassifier.equals("J48")) { this.classifier = new AdaBoostM1(); } else if (nameClassifier.equals("RandomForest")) { this.classifier = new RandomForest(); } else if (nameClassifier.equals("Bayes")) { this.classifier = new BayesNet(); } else if (nameClassifier.equals("knn")) { this.classifier = new IBk(); } else if (nameClassifier.equals("ZeroR")) { this.classifier = new ZeroR(); } else if (nameClassifier.equals("NN")) { this.classifier = new MultilayerPerceptron(); } else {/*w w w .java 2 s . c om*/ this.classifier = new ZeroR(); } this.nameClassifier = classifier.getClass().getName(); }
From source file:meddle.TrainModelByDomainOS.java
License:Open Source License
/** * Given the classifierName, return a classifier * * @param classifierName//from w w w . ja va2s.c o m * e.g. J48, Bagging etc. */ public static Classifier getClassifier(String classifierName) { Classifier classifier = null; if (classifierName.equals("J48")) { J48 j48 = new J48(); j48.setUnpruned(true); classifier = j48; } else if (classifierName.equals("AdaBoostM1")) { AdaBoostM1 adm = new AdaBoostM1(); adm.setNumIterations(10); J48 j48 = new J48(); adm.setClassifier(j48); classifier = adm; } else if (classifierName.equals("Bagging")) { Bagging bagging = new Bagging(); bagging.setNumIterations(10); J48 j48 = new J48(); bagging.setClassifier(j48); classifier = bagging; } else if (classifierName.equals("Stacking")) { Stacking stacking = new Stacking(); stacking.setMetaClassifier(new Logistic()); Classifier cc[] = new Classifier[2]; cc[0] = new J48(); cc[1] = new IBk(); stacking.setClassifiers(cc); classifier = stacking; } else if (classifierName.equals("AdditiveRegression")) { AdditiveRegression ar = new AdditiveRegression(); ar.setClassifier(new J48()); classifier = ar; } else if (classifierName.equals("LogitBoost")) { LogitBoost lb = new LogitBoost(); lb.setClassifier(new J48()); classifier = lb; } return classifier; }
From source file:mulan.classifier.transformation.AdaBoostMH.java
License:Open Source License
/** * Default constructor */ public AdaBoostMH() { super(new AdaBoostM1()); }
From source file:org.mcennis.graphrat.algorithm.machinelearning.MultiInstanceSVM.java
License:Open Source License
/** * Generate music predictions for a user as follows: * Calculate all artists A present in the data set. * Create a data set containing two numeric attributes (typically generated by the * AddBasicInterestLink and AddMusicLinks algorithms), a boolean for every artist * and a boolean class variable. These fields are populated as follows * <br>/* w w w. j a va 2 s . c o m*/ * For each artist, generate a 2-class classifier. * <br> * For every user, for every friend of the user: * First two fields are the interest and music link (0 if absent). * The artist fields are the music listened to by the friend * The final field is whether or not the user listens to the music specified. * * For memory reasons, not all training data is used. * FIXME: hard coded to a maximum 160 positive instances - should be a parameter */ public void execute(Graph g) { artists = g.getActor((String) parameter[2].getValue()); java.util.Arrays.sort(artists); user = g.getActor((String) parameter[7].getValue()); fireChange(Scheduler.SET_ALGORITHM_COUNT, artists.length); // correctlyClassified = new int[user.length]; // totalClassified = new int[user.length]; // totalPresent = new int[user.length]; // java.util.Arrays.fill(correctlyClassified, 0); // java.util.Arrays.fill(totalClassified, 0); // java.util.Arrays.fill(totalPresent, 0); // for (int i = 0; i < user.length; ++i) { // Link[] given = g.getLinkBySource((String) parameter[3].getValue(), user[i]); // if (given != null) { // totalPresent[i] = given.length; // } // } int totalPerFile = countTotal(g); for (int i = 0; i < artists.length; ++i) { try { if (i % 10 == 0) { Logger.getLogger(MultiInstanceSVM.class.getName()).log(Level.INFO, "Evaluating for artist " + artists[i].getID() + " " + i + " of " + artists.length); fireChange(Scheduler.SET_ALGORITHM_PROGRESS, i); } Instances dataSet = createDataSet(artists); int totalThisArtist = totalYes(g, artists[i]); int positiveSkipCount = 1; if ((((Integer) parameter[10].getValue()).intValue() != 0) && (totalThisArtist > ((Integer) parameter[10].getValue()))) { positiveSkipCount = (totalThisArtist / 160) + 1; } if (totalThisArtist > 0) { int skipValue = (int) ((((Double) parameter[11].getValue()).doubleValue() * totalPerFile) / (totalThisArtist / positiveSkipCount)); if (skipValue <= 0) { skipValue = 1; } if (!(Boolean) parameter[6].getValue()) { skipValue = 1; } addInstances(g, dataSet, artists[i], skipValue, positiveSkipCount); // Classifier classifier = getClassifier(); AdaBoostM1 classifier = new AdaBoostM1(); try { Logger.getLogger(MultiInstanceSVM.class.getName()).log(Level.FINER, "Building Classifier"); classifier.buildClassifier(dataSet); Logger.getLogger(MultiInstanceSVM.class.getName()).log(Level.FINER, "Evaluating Classifer"); evaluateClassifier(classifier, dataSet, g, artists[i]); } catch (Exception ex) { ex.printStackTrace(); } classifier = null; } else { Logger.getLogger(MultiInstanceSVM.class.getName()).log(Level.WARNING, "Artist '" + artists[i].getID() + "' has no users listening to them"); } dataSet = null; } catch (java.lang.OutOfMemoryError e) { Logger.getLogger(MultiInstanceSVM.class.getName()).log(Level.WARNING, "Artist " + artists[i].getID() + " (" + i + ") ran out of memory"); // System.gc(); } } // double precision = 0.0; // double precisionSum = 0.0; // double precisionSquared = 0.0; // double recall = 0.0; // double recallSum = 0.0; // double recallSquared = 0.0; // for (int i = 0; i < correctlyClassified.length; ++i) { // if (totalClassified[i] > 0) { // precision = ((double) correctlyClassified[i]) / ((double) totalClassified[i]); // } else { // precision = 0.0; // } // precisionSum += precision; // precisionSquared += precision * precision; // } // for (int i = 0; i < totalPresent.length; ++i) { // if (totalPresent[i] > 0) { // recall = ((double) correctlyClassified[i]) / ((double) totalPresent[i]); // } else { // recall = 0; // } // recallSum += recall; // recallSquared += recall * recall; // } // double sd = ((correctlyClassified.length * precisionSquared) - precisionSum * precisionSum) / correctlyClassified.length; // double mean = precisionSum / correctlyClassified.length; // System.out.println("Positive Precision\t" + mean); // System.out.println("Positive Precision SD\t" + sd); // sd = ((correctlyClassified.length * recallSquared) - recallSum * recallSum) / correctlyClassified.length; // mean = recallSum / correctlyClassified.length; // System.out.println("Positive Recall\t" + mean); // System.out.println("Positive Recall SD\t" + sd); }
From source file:org.mcennis.graphrat.algorithm.machinelearning.SVM.java
License:Open Source License
public void execute(Graph g) { artists = g.getActor((String) parameter[2].getValue()); java.util.Arrays.sort(artists); user = g.getActor((String) parameter[7].getValue()); fireChange(Scheduler.SET_ALGORITHM_COUNT, artists.length); int totalPerFile = user.length; for (int i = 0; i < artists.length; ++i) { try {/*from w w w . j a va2s.com*/ if (i % 10 == 0) { System.out.println( "Evaluating for artist " + artists[i].getID() + " " + i + " of " + artists.length); fireChange(Scheduler.SET_ALGORITHM_PROGRESS, i); } Instances dataSet = createDataSet(artists); int totalThisArtist = g.getLinkByDestination((String) parameter[3].getValue(), artists[i]).length; int positiveSkipCount = 1; if ((((Integer) parameter[10].getValue()).intValue() != 0) && (totalThisArtist > ((Integer) parameter[10].getValue()))) { positiveSkipCount = (totalThisArtist / 160) + 1; } if (totalThisArtist > 0) { int skipValue = (int) ((((Double) parameter[11].getValue()).doubleValue() * totalPerFile) / (totalThisArtist / positiveSkipCount)); if (skipValue <= 0) { skipValue = 1; } if (!(Boolean) parameter[6].getValue()) { skipValue = 1; } addInstances(g, dataSet, artists[i], skipValue, positiveSkipCount); // Classifier classifier = getClassifier(); AdaBoostM1 classifier = new AdaBoostM1(); try { // System.out.println("Building Classifier"); classifier.buildClassifier(dataSet); // System.out.println("Evaluating Classifer"); evaluateClassifier(classifier, dataSet, g, artists[i]); } catch (Exception ex) { ex.printStackTrace(); } classifier = null; } dataSet = null; } catch (java.lang.OutOfMemoryError e) { System.err.println("Artist " + artists[i].getID() + " (" + i + ") ran out of memory"); System.gc(); } } }
From source file:tclass.ABClassifier.java
License:Open Source License
public static void main(String[] args) throws Exception { Debug.setDebugLevel(Debug.PROGRESS); ExpAB_TC2 thisExp = new ExpAB_TC2(); thisExp.parseArgs(args);//from w w w .jav a 2 s. c o m DomDesc domDesc = new DomDesc(thisExp.domDescFile); ClassStreamVecI trainStreamData = new ClassStreamVec(thisExp.trainDataFile, domDesc); ClassStreamVecI testStreamData = new ClassStreamVec(thisExp.testDataFile, domDesc); Debug.dp(Debug.PROGRESS, "PROGRESS: Data read in"); Settings settings = new Settings(thisExp.settingsFile, domDesc); EventExtractor evExtractor = settings.getEventExtractor(); // Global data is likely to be included in every model; so we // might as well calculated now GlobalCalc globalCalc = settings.getGlobalCalc(); ClassStreamAttValVecI trainGlobalData = globalCalc.applyGlobals(trainStreamData); ClassStreamAttValVecI testGlobalData = globalCalc.applyGlobals(testStreamData); // And we might as well extract the events. Debug.dp(Debug.PROGRESS, "PROGRESS: Globals calculated."); Debug.dp(Debug.PROGRESS, "Train: " + trainGlobalData.size() + " Test: " + testGlobalData.size()); ClassStreamEventsVecI trainEventData = evExtractor.extractEvents(trainStreamData); ClassStreamEventsVecI testEventData = evExtractor.extractEvents(testStreamData); Debug.dp(Debug.PROGRESS, "PROGRESS: Events extracted"); // System.out.println(trainEventData.toString()); // Now we want the clustering algorithms only to cluster // instances of each class. Make an array of clusterers, // one per class. int numTestStreams = testEventData.size(); int numClasses = domDesc.getClassDescVec().size(); EventDescVecI eventDescVec = evExtractor.getDescription(); EventClusterer[] eventClusterers = new EventClusterer[numClasses]; // And now, initialise. for (int i = 0; i < numClasses; i++) { // The new way: eventClusterers[i] = settings.getEventClusterer(); // The old way: // eventClusterers[i] = new EventClusterer(new // StreamTokenizer( // new FileReader(thisExp.evClusterDesc)), // domDesc, // eventDescVec); // System.out.println(eventClusterers[i]); } // Segment the data. ClassStreamEventsVec[] trainStreamsByClass = new ClassStreamEventsVec[numClasses]; for (int i = 0; i < numClasses; i++) { trainStreamsByClass[i] = new ClassStreamEventsVec(); trainStreamsByClass[i].setClassVec(new ClassificationVec()); trainStreamsByClass[i].setStreamEventsVec(new StreamEventsVec()); } Debug.dp(Debug.PROGRESS, "PROGRESS: Data rearranged."); //And now load it up. StreamEventsVecI trainEventSEV = trainEventData.getStreamEventsVec(); ClassificationVecI trainEventCV = trainEventData.getClassVec(); int numTrainStreams = trainEventCV.size(); for (int i = 0; i < numTrainStreams; i++) { int currentClass = trainEventCV.elAt(i).getRealClass(); trainStreamsByClass[currentClass].add(trainEventSEV.elAt(i), trainEventCV.elAt(i)); } ClusterVecI[] clustersByClass = new ClusterVecI[numClasses]; for (int i = 0; i < numClasses; i++) { clustersByClass[i] = eventClusterers[i].clusterEvents(trainStreamsByClass[i]); Debug.dp(Debug.PROGRESS, "PROGRESS: Clustering of " + i + " complete"); Debug.dp(Debug.PROGRESS, "Clusters for class: " + domDesc.getClassDescVec().getClassLabel(i) + " are:"); Debug.dp(Debug.PROGRESS, eventClusterers[i].getMapping()); } Debug.dp(Debug.PROGRESS, "PROGRESS: Clustering complete. "); // But wait! There's more! There is always more. // The first thing was only useful for clustering. // Now attribution. We want to attribute all the data. So we are going // to have one dataset for each learner. // First set up the attributors. Attributor[] attribsByClass = new Attributor[numClasses]; for (int i = 0; i < numClasses; i++) { attribsByClass[i] = new Attributor(domDesc, clustersByClass[i], eventClusterers[i].getDescription()); Debug.dp(Debug.PROGRESS, "PROGRESS: AttributorMkr of " + i + " complete."); } ClassStreamAttValVecI[] trainEventAtts = new ClassStreamAttValVec[numClasses]; ClassStreamAttValVecI[] testEventAtts = new ClassStreamAttValVec[numClasses]; for (int i = 0; i < numClasses; i++) { trainEventAtts[i] = attribsByClass[i].attribute(trainStreamData, trainEventData); testEventAtts[i] = attribsByClass[i].attribute(testStreamData, testEventData); Debug.dp(Debug.PROGRESS, "PROGRESS: Attribution of " + i + " complete."); } Debug.dp(Debug.PROGRESS, "PROGRESS: Attribution complete."); // Combine all data sources. For now, globals go in every // one. Combiner c = new Combiner(); ClassStreamAttValVecI[] trainAttsByClass = new ClassStreamAttValVec[numClasses]; ClassStreamAttValVecI[] testAttsByClass = new ClassStreamAttValVec[numClasses]; for (int i = 0; i < numClasses; i++) { trainAttsByClass[i] = c.combine(trainGlobalData, trainEventAtts[i]); testAttsByClass[i] = c.combine(testGlobalData, testEventAtts[i]); } // Now we have to do some garbage collection. trainStreamData = null; testStreamData = null; eventClusterers = null; trainEventSEV = null; trainEventCV = null; clustersByClass = null; attribsByClass = null; System.gc(); // So now we have the raw data in the correct form for each // attributor. // And now, we can construct a learner for each case. // Well, for now, I'm going to do something completely crazy. // Let's run each classifier nonetheless over the whole data // ... and see what the hell happens. Maybe some voting scheme // is possible!! This is a strange form of ensemble // classifier. // Each naive bayes algorithm only gets one Debug.setDebugLevel(Debug.PROGRESS); AdaBoostM1[] dtLearners = new AdaBoostM1[numClasses]; for (int i = 0; i < numClasses; i++) { dtLearners[i] = new AdaBoostM1(); dtLearners[i].setClassifier(new J48()); Debug.dp(Debug.PROGRESS, "PROGRESS: Beginning format conversion for class " + i); Instances data = WekaBridge.makeInstances(trainAttsByClass[i], "Train " + i); Debug.dp(Debug.PROGRESS, "PROGRESS: Conversion complete. Starting learning"); dtLearners[i].buildClassifier(data); Debug.dp(Debug.PROGRESS, "Learnt tree: \n" + dtLearners[i].toString()); } ABClassifier[] dtClassifiers = new ABClassifier[numClasses]; for (int i = 0; i < numClasses; i++) { dtClassifiers[i] = new ABClassifier(dtLearners[i]); // System.out.println(nbClassifiers[i].toString()); } Debug.dp(Debug.PROGRESS, "PROGRESS: Learning complete. "); // Now test on training data (each one) /* for(int i=0; i < numClasses; i++){ String className = domDesc.getClassDescVec().getClassLabel(i); ClassificationVecI classvi = (ClassificationVecI) trainAttsByClass[i].getClassVec().clone(); StreamAttValVecI savvi = trainAttsByClass[i].getStreamAttValVec(); for(int j=0; j < trainAttsByClass[i].size(); j++){ nbClassifiers[i].classify(savvi.elAt(j), classvi.elAt(j)); } System.out.println(">>> Learner for class " + className); int numCorrect = 0; for(int j=0; j < classvi.size(); j++){ System.out.print(classvi.elAt(j).toString()); if(classvi.elAt(j).getRealClass() == classvi.elAt(j).getPredictedClass()){ numCorrect++; } } System.out.println("Train accuracy for " + className + " classifier: " + numCorrect + " of " + numTrainStreams + " (" + numCorrect*100.0/numTrainStreams + "%)"); } */ System.out.println(">>> Testing stage <<<"); // First, print the results of using the straight testers. ClassificationVecI[] classns = new ClassificationVecI[numClasses]; for (int i = 0; i < numClasses; i++) { String className = domDesc.getClassDescVec().getClassLabel(i); classns[i] = (ClassificationVecI) testAttsByClass[i].getClassVec().clone(); StreamAttValVecI savvi = testAttsByClass[i].getStreamAttValVec(); Instances data = WekaBridge.makeInstances(testAttsByClass[i], "Test " + i); for (int j = 0; j < numTestStreams; j++) { dtClassifiers[i].classify(data.instance(j), classns[i].elAt(j)); } System.out.println(">>> Learner for class " + className); int numCorrect = 0; for (int j = 0; j < numTestStreams; j++) { System.out.print(classns[i].elAt(j).toString()); if (classns[i].elAt(j).getRealClass() == classns[i].elAt(j).getPredictedClass()) { numCorrect++; } } System.out.println("Test accuracy for " + className + " classifier: " + numCorrect + " of " + numTestStreams + " (" + numCorrect * 100.0 / numTestStreams + "%)"); } // Now do voting. This is a hack solution. int numCorrect = 0; for (int i = 0; i < numTestStreams; i++) { int[] votes = new int[numClasses]; int realClass = classns[0].elAt(i).getRealClass(); String realClassName = domDesc.getClassDescVec().getClassLabel(realClass); for (int j = 0; j < numClasses; j++) { int thisPrediction = classns[j].elAt(i).getPredictedClass(); // if(thisPrediction == j){ // votes[thisPrediction] += 2; // } //else { votes[thisPrediction]++; //} } int maxIndex = -1; int maxVotes = 0; String voteRes = "[ "; for (int j = 0; j < numClasses; j++) { voteRes += votes[j] + " "; if (votes[j] > maxVotes) { maxIndex = j; maxVotes = votes[j]; } } voteRes += "]"; // Now print the result: String predictedClassName = domDesc.getClassDescVec().getClassLabel(maxIndex); if (maxIndex == realClass) { System.out.println("Class " + realClassName + " CORRECTLY classified with " + maxVotes + " votes. Votes: " + voteRes); numCorrect++; } else { System.out.println("Class " + realClassName + " INCORRECTLY classified as " + predictedClassName + " with " + maxVotes + " votes. Votes: " + voteRes); } } System.out.println("Final voted accuracy: " + numCorrect + " of " + numTestStreams + " (" + numCorrect * 100.0 / numTestStreams + "%)"); }
From source file:wekimini.learning.AdaboostModelBuilder.java
public AdaboostModelBuilder() { classifier = new AdaBoostM1(); // ((AdaBoostM1) classifier).setClassifier(new DecisionStump()); ((AdaBoostM1) classifier).setClassifier(new J48()); ((AdaBoostM1) classifier).setNumIterations(defaultNumRounds); }