Java examples for Machine Learning AI:weka
weka Combine Ensemble Sampling
import java.io.FileWriter; import java.util.ArrayList; import weka.classifiers.Classifier; import weka.classifiers.CostMatrix; import weka.classifiers.Evaluation; import weka.classifiers.bayes.BayesNet; import weka.classifiers.bayes.NaiveBayes; import weka.classifiers.evaluation.Prediction; import weka.classifiers.functions.MultilayerPerceptron; import weka.classifiers.meta.AdaBoostM1; import weka.classifiers.meta.Bagging; import weka.classifiers.meta.MetaCost; import weka.classifiers.meta.Stacking; import weka.classifiers.meta.Vote; import weka.classifiers.trees.J48; import weka.classifiers.trees.REPTree; import weka.classifiers.trees.RandomForest; import weka.clusterers.SimpleKMeans; import weka.core.Instances; import weka.core.SelectedTag; import weka.core.converters.ConverterUtils.DataSource; import weka.filters.Filter; import weka.filters.supervised.instance.Resample; import weka.filters.supervised.instance.SMOTE; import weka.filters.unsupervised.attribute.Add; import weka.filters.unsupervised.instance.RemoveFrequentValues; import au.com.bytecode.opencsv.CSVWriter; import weka.classifiers.meta.RandomSubSpace; public class Main { public static void main(String[] args) throws Exception { //from w ww .j ava 2 s .c om Instances train = DataSource read("./train1.arff"); int cid1 = train.numAttributes() - 1; train.setClassIndex(cid1); Instances validation = DataSource read("./validation1.arff"); int cid2 = validation.numAttributes() - 1; validation.setClassIndex(cid2); Instances test = DataSource read("./test1.arff"); int cid3 = test.numAttributes() - 1; test.setClassIndex(cid3); //adaboost J48 J48 jtree1 = new J48(); AdaBoostM1 btree1 = new AdaBoostM1(); btree1.setClassifier(jtree1); //adaboost REPTree REPTree jtree2 = new REPTree(); AdaBoostM1 btree2 = new AdaBoostM1(); btree2.setClassifier(jtree2); //adaboost RF RandomForest jtree3 = new RandomForest(); AdaBoostM1 btree3 = new AdaBoostM1(); btree3.setClassifier(jtree3); //bagging J48 J48 jtree4 = new J48(); Bagging btree4 = new Bagging(); btree4.setClassifier(jtree4); //Bagging REPTree REPTree jtree5 = new REPTree(); Bagging btree5 = new Bagging(); btree5.setClassifier(jtree5); //Bagging RF RandomForest jtree6 = new RandomForest(); Bagging btree6 = new Bagging(); btree6.setClassifier(jtree6); btree6.buildClassifier(train); //Stacking NB BJ48 BRF NaiveBayes NB7 = new NaiveBayes(); J48 j48_tree7 = new J48(); RandomForest RF7 = new RandomForest(); Bagging b17 = new Bagging(); b17.setClassifier(j48_tree7); Bagging b27 = new Bagging(); b27.setClassifier(RF7); Stacking btree7 = new Stacking(); Classifier[] classifiers7 = new Classifier[2]; classifiers7[0] = b17; classifiers7[1] = b27; btree7.setClassifiers(classifiers7); btree7.setMetaClassifier(NB7); btree7.buildClassifier(train); //Stacking NB J48 MLP NaiveBayes NB8 = new NaiveBayes(); J48 j48_tree8 = new J48(); MultilayerPerceptron mp8 = new MultilayerPerceptron(); Stacking btree8 = new Stacking(); Classifier[] classifiers8 = new Classifier[2]; classifiers8[0] = j48_tree8; classifiers8[1] = mp8; btree8.setClassifiers(classifiers8); btree8.setMetaClassifier(NB8); //btree8.buildClassifier(train); //Cluster 15:85 Bagging RF //Remove fraud class instances Instances train9 = train; RemoveFrequentValues remove9 = new RemoveFrequentValues(); remove9.setInputFormat(train9); remove9.setAttributeIndex("last"); remove9.setNumValues(1); Instances train_ok9 = Filter.useFilter(train9, remove9); int cid49 = train_ok9.numAttributes() - 1; train_ok9.setClassIndex(cid49); //Remove ok class instances RemoveFrequentValues remove19 = new RemoveFrequentValues(); remove19.setInputFormat(train9); remove19.setAttributeIndex("last"); remove19.setNumValues(1); remove19.setUseLeastValues(true); Instances train_fraud9 = Filter.useFilter(train9, remove19); int cid59 = train_fraud9.numAttributes() - 1; train_fraud9.setClassIndex(cid59); //remove class attribute for clustering weka.filters.unsupervised.attribute.Remove filter9 = new weka.filters.unsupervised.attribute.Remove(); filter9.setAttributeIndices("" + (train_ok9.classIndex() + 1)); filter9.setInputFormat(train_ok9); Instances dataClusterer9 = Filter.useFilter(train_ok9, filter9); //cluster using K-means SimpleKMeans cluster9 = new SimpleKMeans(); cluster9.setNumClusters(409); cluster9.buildClusterer(dataClusterer9); train_ok9 = cluster9.getClusterCentroids(); //Add deleted class attribute Add add_attribute9 = new Add(); add_attribute9.setAttributeName("status"); add_attribute9.setAttributeIndex("last"); add_attribute9.setNominalLabels("0,1"); add_attribute9.setInputFormat(train_ok9); train_ok9 = Filter.useFilter(train_ok9, add_attribute9); for (int i = 0; i < train_ok9.numInstances(); i++) { train_ok9.instance(i).setValue(train_ok9.numAttributes() - 1, "0"); } int cid79 = train_ok9.numAttributes() - 1; train_ok9.setClassIndex(cid79); //combine train_ok and train_fraud for (int i = 0; i < train_fraud9.numInstances(); i++) train_ok9.add(train_fraud9.instance(i)); train9 = train_ok9; int cid69 = train9.numAttributes() - 1; train9.setClassIndex(cid69); //Bagging RF RandomForest rf9 = new RandomForest(); Bagging btree9 = new Bagging(); btree9.setClassifier(rf9); btree9.buildClassifier(train9); //cluster 33:67 stacking NB BJ48 BRF //Remove fraud class instances Instances train10 = train; RemoveFrequentValues remove10 = new RemoveFrequentValues(); remove10.setInputFormat(train10); remove10.setAttributeIndex("last"); remove10.setNumValues(1); Instances train_ok10 = Filter.useFilter(train10, remove10); int cid410 = train_ok10.numAttributes() - 1; train_ok10.setClassIndex(cid410); //Remove ok class instances RemoveFrequentValues remove110 = new RemoveFrequentValues(); remove110.setInputFormat(train10); remove110.setAttributeIndex("last"); remove110.setNumValues(1); remove110.setUseLeastValues(true); Instances train_fraud10 = Filter.useFilter(train10, remove110); int cid510 = train_fraud10.numAttributes() - 1; train_fraud10.setClassIndex(cid510); //remove class attribute for clustering weka.filters.unsupervised.attribute.Remove filter10 = new weka.filters.unsupervised.attribute.Remove(); filter10.setAttributeIndices("" + (train_ok10.classIndex() + 1)); filter10.setInputFormat(train_ok10); Instances dataClusterer10 = Filter.useFilter(train_ok10, filter10); //cluster using K-means SimpleKMeans cluster10 = new SimpleKMeans(); cluster10.setNumClusters(146); cluster10.buildClusterer(dataClusterer10); train_ok10 = cluster10.getClusterCentroids(); //Add deleted class attribute Add add_attribute10 = new Add(); add_attribute10.setAttributeName("status"); add_attribute10.setAttributeIndex("last"); add_attribute10.setNominalLabels("0,1"); //SelectedTag value= //add_attribute.setAttributeType(value); add_attribute10.setInputFormat(train_ok10); train_ok10 = Filter.useFilter(train_ok10, add_attribute10); for (int i = 0; i < train_ok10.numInstances(); i++) { train_ok10.instance(i).setValue(train_ok10.numAttributes() - 1, "0"); } int cid710 = train_ok10.numAttributes() - 1; train_ok10.setClassIndex(cid710); //combine train_ok and train_fraud for (int i = 0; i < train_fraud10.numInstances(); i++) train_ok10.add(train_fraud10.instance(i)); train10 = train_ok10; int cid610 = train10.numAttributes() - 1; train10.setClassIndex(cid610); //Stacking NB BJ48 BRF NaiveBayes NB10 = new NaiveBayes(); J48 j48_tree10 = new J48(); RandomForest RF10 = new RandomForest(); Bagging b110 = new Bagging(); b110.setClassifier(j48_tree10); Bagging b210 = new Bagging(); b210.setClassifier(RF10); Stacking btree10 = new Stacking(); Classifier[] classifiers10 = new Classifier[2]; classifiers10[0] = b110; classifiers10[1] = b210; btree10.setClassifiers(classifiers10); btree10.setMetaClassifier(NB10); btree10.buildClassifier(train10); //Cluster 50:50 Bagging RF //Remove fraud class instances Instances train11 = train; RemoveFrequentValues remove11 = new RemoveFrequentValues(); remove11.setInputFormat(train11); remove11.setAttributeIndex("last"); remove11.setNumValues(1); Instances train_ok11 = Filter.useFilter(train11, remove11); int cid411 = train_ok11.numAttributes() - 1; train_ok11.setClassIndex(cid411); //Remove ok class instances RemoveFrequentValues remove111 = new RemoveFrequentValues(); remove111.setInputFormat(train11); remove111.setAttributeIndex("last"); remove111.setNumValues(1); remove111.setUseLeastValues(true); Instances train_fraud11 = Filter.useFilter(train11, remove111); int cid511 = train_fraud11.numAttributes() - 1; train_fraud11.setClassIndex(cid511); //remove class attribute for clustering weka.filters.unsupervised.attribute.Remove filter11 = new weka.filters.unsupervised.attribute.Remove(); filter11.setAttributeIndices("" + (train_ok11.classIndex() + 1)); filter11.setInputFormat(train_ok11); Instances dataClusterer11 = Filter.useFilter(train_ok11, filter11); //cluster using K-means SimpleKMeans cluster11 = new SimpleKMeans(); cluster11.setNumClusters(72); cluster11.buildClusterer(dataClusterer11); train_ok11 = cluster11.getClusterCentroids(); //Add deleted class attribute Add add_attribute11 = new Add(); add_attribute11.setAttributeName("status"); add_attribute11.setAttributeIndex("last"); add_attribute11.setNominalLabels("0,1"); //SelectedTag value= //add_attribute.setAttributeType(value); add_attribute11.setInputFormat(train_ok11); train_ok11 = Filter.useFilter(train_ok11, add_attribute11); for (int i = 0; i < train_ok11.numInstances(); i++) { train_ok11.instance(i).setValue(train_ok11.numAttributes() - 1, "0"); } int cid711 = train_ok11.numAttributes() - 1; train_ok11.setClassIndex(cid711); //combine train_ok and train_fraud for (int i = 0; i < train_fraud11.numInstances(); i++) train_ok11.add(train_fraud11.instance(i)); train11 = train_ok11; int cid611 = train11.numAttributes() - 1; train11.setClassIndex(cid611); //Bagging with RF RandomForest rf11 = new RandomForest(); Bagging btree11 = new Bagging(); btree11.setClassifier(rf11); btree11.buildClassifier(train11); //Resampling bagging REPTree //Resampling 17:83 Instances train12 = train; Resample rs12 = new Resample(); rs12.setBiasToUniformClass(0.3); rs12.setInputFormat(train12); rs12.setSampleSizePercent(100); train12 = Filter.useFilter(train12, rs12); System.out.println(train12.numInstances()); //Bagging REPTree REPTree jtree12 = new REPTree(); Bagging btree12 = new Bagging(); btree12.setClassifier(jtree12); btree12.buildClassifier(train12); //SMOTE100 Stacking NB BJ48 BRF Instances train13 = train; NaiveBayes NB13 = new NaiveBayes(); J48 j48_tree13 = new J48(); RandomForest RF13 = new RandomForest(); Bagging b113 = new Bagging(); b113.setClassifier(j48_tree13); Bagging b213 = new Bagging(); b213.setClassifier(RF13); //SMOTE SMOTE sm13 = new SMOTE(); sm13.setInputFormat(train13); sm13.setNearestNeighbors(5); sm13.setPercentage(100); train13 = Filter.useFilter(train13, sm13); //Stacking NB BJ48 BRF Stacking btree13 = new Stacking(); Classifier[] classifiers13 = new Classifier[2]; classifiers13[0] = b113; classifiers13[1] = b213; btree13.setClassifiers(classifiers13); btree13.setMetaClassifier(NB13); btree13.buildClassifier(train13); //SMOTE 500 Bagging J48 //SMOTE Instances train14 = train; SMOTE sm14 = new SMOTE(); sm14.setInputFormat(train14); sm14.setNearestNeighbors(5); sm14.setPercentage(500); train14 = Filter.useFilter(train14, sm14); //Bagging J48 J48 jtree14 = new J48(); Bagging btree14 = new Bagging(); btree14.setClassifier(jtree14); btree14.buildClassifier(train14); //Random Subspace J48 Instances train15 = train; J48 jtree15 = new J48(); RandomSubSpace btree15 = new RandomSubSpace(); btree15.setClassifier(jtree15); btree15.buildClassifier(train15); //Cluster 15:85 Bagging J48 //Remove fraud class instances Instances train16 = train; RemoveFrequentValues remove16 = new RemoveFrequentValues(); remove16.setInputFormat(train16); remove16.setAttributeIndex("last"); remove16.setNumValues(1); Instances train_ok16 = Filter.useFilter(train16, remove16); int cid416 = train_ok16.numAttributes() - 1; train_ok16.setClassIndex(cid416); //Remove ok class instances RemoveFrequentValues remove116 = new RemoveFrequentValues(); remove116.setInputFormat(train16); remove116.setAttributeIndex("last"); remove116.setNumValues(1); remove116.setUseLeastValues(true); Instances train_fraud16 = Filter.useFilter(train16, remove116); int cid516 = train_fraud16.numAttributes() - 1; train_fraud16.setClassIndex(cid516); //remove class attribute for clustering weka.filters.unsupervised.attribute.Remove filter16 = new weka.filters.unsupervised.attribute.Remove(); filter16.setAttributeIndices("" + (train_ok16.classIndex() + 1)); filter16.setInputFormat(train_ok16); Instances dataClusterer16 = Filter.useFilter(train_ok16, filter16); //cluster using K-means SimpleKMeans cluster16 = new SimpleKMeans(); cluster16.setNumClusters(409); cluster16.buildClusterer(dataClusterer16); train_ok16 = cluster16.getClusterCentroids(); //Add deleted class attribute Add add_attribute16 = new Add(); add_attribute16.setAttributeName("status"); add_attribute16.setAttributeIndex("last"); add_attribute16.setNominalLabels("0,1"); //SelectedTag value= //add_attribute.setAttributeType(value); add_attribute16.setInputFormat(train_ok16); train_ok16 = Filter.useFilter(train_ok16, add_attribute16); for (int i = 0; i < train_ok16.numInstances(); i++) { train_ok16.instance(i).setValue(train_ok16.numAttributes() - 1, "0"); } int cid716 = train_ok16.numAttributes() - 1; train_ok16.setClassIndex(cid716); //combine train_ok and train_fraud for (int i = 0; i < train_fraud16.numInstances(); i++) train_ok16.add(train_fraud16.instance(i)); train16 = train_ok16; int cid616 = train16.numAttributes() - 1; train16.setClassIndex(cid616); //Bagging J48 J48 jtree16 = new J48(); Bagging btree16 = new Bagging(); btree16.setClassifier(jtree16); btree16.buildClassifier(train16); //Cluster 33:67 Bagging J48 //Remove fraud class instances Instances train17 = train; RemoveFrequentValues remove17 = new RemoveFrequentValues(); remove17.setInputFormat(train17); remove17.setAttributeIndex("last"); remove17.setNumValues(1); Instances train_ok17 = Filter.useFilter(train17, remove17); int cid417 = train_ok17.numAttributes() - 1; train_ok17.setClassIndex(cid417); //Remove ok class instances RemoveFrequentValues remove117 = new RemoveFrequentValues(); remove117.setInputFormat(train17); remove117.setAttributeIndex("last"); remove117.setNumValues(1); remove117.setUseLeastValues(true); Instances train_fraud17 = Filter.useFilter(train17, remove117); int cid517 = train_fraud17.numAttributes() - 1; train_fraud17.setClassIndex(cid517); //remove class attribute for clustering weka.filters.unsupervised.attribute.Remove filter17 = new weka.filters.unsupervised.attribute.Remove(); filter17.setAttributeIndices("" + (train_ok17.classIndex() + 1)); filter17.setInputFormat(train_ok17); Instances dataClusterer17 = Filter.useFilter(train_ok17, filter17); //cluster using K-means SimpleKMeans cluster17 = new SimpleKMeans(); cluster17.setNumClusters(146); cluster17.buildClusterer(dataClusterer17); train_ok17 = cluster17.getClusterCentroids(); //Add deleted class attribute Add add_attribute17 = new Add(); add_attribute17.setAttributeName("status"); add_attribute17.setAttributeIndex("last"); add_attribute17.setNominalLabels("0,1"); //SelectedTag value= //add_attribute.setAttributeType(value); add_attribute17.setInputFormat(train_ok17); train_ok17 = Filter.useFilter(train_ok17, add_attribute17); for (int i = 0; i < train_ok17.numInstances(); i++) { train_ok17.instance(i).setValue(train_ok17.numAttributes() - 1, "0"); } int cid717 = train_ok17.numAttributes() - 1; train_ok17.setClassIndex(cid717); System.out.println(train_ok17.numInstances()); System.out.println(train_fraud17.numInstances()); //combine train_ok and train_fraud for (int i = 0; i < train_fraud17.numInstances(); i++) train_ok17.add(train_fraud17.instance(i)); train17 = train_ok17; int cid617 = train17.numAttributes() - 1; train17.setClassIndex(cid617); //Bagging J48 J48 jtree17 = new J48(); Bagging btree17 = new Bagging(); btree17.setClassifier(jtree17); btree17.buildClassifier(train17); //RF Instances train18 = train; RandomForest btree18 = new RandomForest(); btree18.setNumTrees(41); btree18.buildClassifier(train18); //Bayes NET Instances train19 = train; BayesNet btree19 = new BayesNet(); btree19.buildClassifier(train19); //MLP Instances train20 = train; MultilayerPerceptron btree20 = new MultilayerPerceptron(); btree20.buildClassifier(train20); //MetaCost Bagging RF Instances train21 = train; RandomForest rf21 = new RandomForest(); Bagging tree21 = new Bagging(); tree21.setClassifier(rf21); //tree.buildClassifier(train); //MetaCost CostMatrix cm21 = new CostMatrix(2); cm21.setElement(0, 1, 1); cm21.setElement(1, 0, 5); cm21.setElement(0, 0, 0); cm21.setElement(1, 1, 0); MetaCost btree21 = new MetaCost(); btree21.setClassifier(tree21); btree21.setCostMatrix(cm21); btree21.buildClassifier(train21); //VOTE: Combine multiple classifier Vote tree = new Vote(); Classifier[] classifiers = { btree3, btree4, btree5, btree6 }; tree.setClassifiers(classifiers); tree.addPreBuiltClassifier(btree15); tree.addPreBuiltClassifier(btree19); tree.addPreBuiltClassifier(btree17); tree.addPreBuiltClassifier(btree7); tree.addPreBuiltClassifier(btree11); tree.addPreBuiltClassifier(btree10); tree.addPreBuiltClassifier(btree18); tree.addPreBuiltClassifier(btree20); //combination criteria tree.setCombinationRule(new SelectedTag(Vote.AVERAGE_RULE, Vote.TAGS_RULES)); tree.buildClassifier(train); Evaluation eval = new Evaluation(train); eval.evaluateModel(tree, validation); System.out.println(eval.toSummaryString("\nResults_RF\n\n", false)); System.out.println(eval.toClassDetailsString()); System.out.println(eval.toMatrixString()); ArrayList<Prediction> al = eval.predictions(); ArrayList<String[]> as = new ArrayList<String[]>(al.size()); for (int i = 0; i < al.size(); i++) { String[] s = new String[1]; s[0] = al.get(i).toString(); s[0] = s[0].substring(9, 11); as.add(s); } ArrayList<String[]> li = new ArrayList<String[]>(al.size()); li.addAll(as); String csv = "./output.csv"; CSVWriter writer = new CSVWriter(new FileWriter(csv)); writer.writeAll(li); writer.close(); } }