List of usage examples for weka.classifiers.meta FilteredClassifier FilteredClassifier
public FilteredClassifier()
From source file:miRdup.WekaModule.java
License:Open Source License
public static void trainModel(File arff, String keyword) { dec.setMaximumFractionDigits(3);// w ww .ja v a 2 s. c om System.out.println("\nTraining model on file " + arff); try { // load data DataSource source = new DataSource(arff.toString()); Instances data = source.getDataSet(); if (data.classIndex() == -1) { data.setClassIndex(data.numAttributes() - 1); } PrintWriter pwout = new PrintWriter(new FileWriter(keyword + Main.modelExtension + "Output")); PrintWriter pwroc = new PrintWriter(new FileWriter(keyword + Main.modelExtension + "roc.arff")); //remove ID row Remove rm = new Remove(); rm.setAttributeIndices("1"); FilteredClassifier fc = new FilteredClassifier(); fc.setFilter(rm); // // train model svm // weka.classifiers.functions.LibSVM model = new weka.classifiers.functions.LibSVM(); // model.setOptions(weka.core.Utils.splitOptions("-S 0 -K 2 -D 3 -G 0.0 -R 0.0 -N 0.5 -M 40.0 -C 1.0 -E 0.0010 -P 0.1 -B")); // train model MultilayerPerceptron // weka.classifiers.functions.MultilayerPerceptron model = new weka.classifiers.functions.MultilayerPerceptron(); // model.setOptions(weka.core.Utils.splitOptions("-L 0.3 -M 0.2 -N 500 -V 0 -S 0 -E 20 -H a")); // train model Adaboost on RIPPER // weka.classifiers.meta.AdaBoostM1 model = new weka.classifiers.meta.AdaBoostM1(); // model.setOptions(weka.core.Utils.splitOptions("weka.classifiers.meta.AdaBoostM1 -P 100 -S 1 -I 10 -W weka.classifiers.rules.JRip -- -F 10 -N 2.0 -O 5 -S 1")); // train model Adaboost on FURIA // weka.classifiers.meta.AdaBoostM1 model = new weka.classifiers.meta.AdaBoostM1(); // model.setOptions(weka.core.Utils.splitOptions("weka.classifiers.meta.AdaBoostM1 -P 100 -S 1 -I 10 -W weka.classifiers.rules.FURIA -- -F 10 -N 2.0 -O 5 -S 1 -p 0 -s 0")); //train model Adaboot on J48 trees // weka.classifiers.meta.AdaBoostM1 model = new weka.classifiers.meta.AdaBoostM1(); // model.setOptions( // weka.core.Utils.splitOptions( // "-P 100 -S 1 -I 10 -W weka.classifiers.trees.J48 -- -C 0.25 -M 2")); //train model Adaboot on Random Forest trees weka.classifiers.meta.AdaBoostM1 model = new weka.classifiers.meta.AdaBoostM1(); model.setOptions(weka.core.Utils .splitOptions("-P 100 -S 1 -I 10 -W weka.classifiers.trees.RandomForest -- -I 50 -K 0 -S 1")); if (Main.debug) { System.out.print("Model options: " + model.getClass().getName().trim() + " "); } System.out.print(model.getClass() + " "); for (String s : model.getOptions()) { System.out.print(s + " "); } pwout.print("Model options: " + model.getClass().getName().trim() + " "); for (String s : model.getOptions()) { pwout.print(s + " "); } //build model // model.buildClassifier(data); fc.setClassifier(model); fc.buildClassifier(data); // cross validation 10 times on the model Evaluation eval = new Evaluation(data); //eval.crossValidateModel(model, data, 10, new Random(1)); StringBuffer sb = new StringBuffer(); eval.crossValidateModel(fc, data, 10, new Random(1), sb, new Range("first,last"), false); //System.out.println(sb); pwout.println(sb); pwout.flush(); // output pwout.println("\n" + eval.toSummaryString()); System.out.println(eval.toSummaryString()); pwout.println(eval.toClassDetailsString()); System.out.println(eval.toClassDetailsString()); //calculate importants values String ev[] = eval.toClassDetailsString().split("\n"); String ptmp[] = ev[3].trim().split(" "); String ntmp[] = ev[4].trim().split(" "); String avgtmp[] = ev[5].trim().split(" "); ArrayList<String> p = new ArrayList<String>(); ArrayList<String> n = new ArrayList<String>(); ArrayList<String> avg = new ArrayList<String>(); for (String s : ptmp) { if (!s.trim().isEmpty()) { p.add(s); } } for (String s : ntmp) { if (!s.trim().isEmpty()) { n.add(s); } } for (String s : avgtmp) { if (!s.trim().isEmpty()) { avg.add(s); } } double tp = Double.parseDouble(p.get(0)); double fp = Double.parseDouble(p.get(1)); double tn = Double.parseDouble(n.get(0)); double fn = Double.parseDouble(n.get(1)); double auc = Double.parseDouble(avg.get(7)); pwout.println("\nTP=" + tp + "\nFP=" + fp + "\nTN=" + tn + "\nFN=" + fn); System.out.println("\nTP=" + tp + "\nFP=" + fp + "\nTN=" + tn + "\nFN=" + fn); //specificity, sensitivity, Mathew's correlation, Prediction accuracy double sp = ((tn) / (tn + fp)); double se = ((tp) / (tp + fn)); double acc = ((tp + tn) / (tp + tn + fp + fn)); double mcc = ((tp * tn) - (fp * fn)) / Math.sqrt((tp + fp) * (tn + fn) * (tp + fn) * tn + fp); String output = "\nse=" + dec.format(se).replace(",", ".") + "\nsp=" + dec.format(sp).replace(",", ".") + "\nACC=" + dec.format(acc).replace(",", ".") + "\nMCC=" + dec.format(mcc).replace(",", ".") + "\nAUC=" + dec.format(auc).replace(",", "."); pwout.println(output); System.out.println(output); pwout.println(eval.toMatrixString()); System.out.println(eval.toMatrixString()); pwout.flush(); pwout.close(); //Saving model System.out.println("Model saved: " + keyword + Main.modelExtension); weka.core.SerializationHelper.write(keyword + Main.modelExtension, fc.getClassifier() /*model*/); // get curve ThresholdCurve tc = new ThresholdCurve(); int classIndex = 0; Instances result = tc.getCurve(eval.predictions(), classIndex); pwroc.print(result.toString()); pwroc.flush(); pwroc.close(); // draw curve //rocCurve(eval); } catch (Exception e) { e.printStackTrace(); } }
From source file:mlpoc.MLPOC.java
/** * @param args the command line arguments *///from www . j a v a 2s . co m public static void main(String[] args) { try { // TODO code application logic here BufferedReader br; br = new BufferedReader( new FileReader("D:/Extra/B.E Project/agrodeploy/webapp/Data/ClusterAutotrain12.arff")); Instances training_data = new Instances(br); br.close(); training_data.setClassIndex(training_data.numAttributes() - 1); br = new BufferedReader(new FileReader("D:/Extra/B.E Project/agrodeploy/webapp/Data/TestFinal.arff")); Instances testing_data = new Instances(br); br.close(); testing_data.setClassIndex(testing_data.numAttributes() - 1); String summary = training_data.toSummaryString(); int number_samples = training_data.numInstances(); int number_attributes_per_sample = training_data.numAttributes(); System.out.println("Number of attributes in model = " + number_attributes_per_sample); System.out.println("Number of samples = " + number_samples); System.out.println("Summary: " + summary); System.out.println(); J48 j48 = new J48(); FilteredClassifier fc = new FilteredClassifier(); fc.setClassifier(j48); fc.buildClassifier(training_data); System.out.println("Testing instances: " + testing_data.numInstances()); for (int i = 0; i < testing_data.numInstances(); i++) { double pred = fc.classifyInstance(testing_data.instance(i)); String s1 = testing_data.classAttribute().value((int) pred); System.out.println(testing_data.instance(i) + " Predicted value: " + s1); } Evaluation crossValidate = crossValidate( "D:/Extra/B.E Project/agrodeploy/webapp/Data/ClusterAutotrain12.arff"); DataSource source = new DataSource( "D:/Extra/B.E Project/agrodeploy/webapp/Data/ClusterAutotrain12.arff"); Instances data = source.getDataSet(); System.out.println(data.numInstances()); data.setClassIndex(data.numAttributes() - 1); // 1. meta-classifier useClassifier(data); // 2. filter useFilter(data); } catch (Exception ex) { Logger.getLogger(MLPOC.class.getName()).log(Level.SEVERE, null, ex); } }
From source file:mulan.classifier.meta.SubsetLearner.java
License:Open Source License
/** * We get the initial dataset through trainingSet. Then for each split as specified by splitOrder * we remove the unneeded labels and train the classifiers using a different method for multi-label splits * and single label splits.//from w ww .j a v a 2 s . com * @param trainingSet The initial {@link MultiLabelInstances} dataset * @throws Exception */ @Override protected void buildInternal(MultiLabelInstances trainingSet) throws Exception { int countSingle = 0, countMulti = 0; remove = new Remove[splitOrder.length]; //Get values into absoluteIndicesToRemove int numofSplits = splitOrder.length;//Number of sets the main is going to be split into for (int r = 0; r < splitOrder.length; r++) {//Initialization required to avoid NullPointer exception absoluteIndicesToRemove[r] = new int[numLabels - splitOrder[r].length]; } //Initialize an array containing which labels we want boolean[][] Selected = new boolean[splitOrder.length][numLabels]; for (int i = 0; i < numofSplits; i++) {//Set true for the labels we need to keep for (int j = 0; j < splitOrder[i].length; j++) { Selected[i][splitOrder[i][j]] = true; } } for (int i = 0; i < numofSplits; i++) {//Get the labels you need to KEEP int k = 0; for (int j = 0; j < numLabels; j++) { if (Selected[i][j] != true) { absoluteIndicesToRemove[i][k] = labelIndices[j]; k++; } } } //Create the lists which will contain the learners multiLabelLearners = new ArrayList<MultiLabelLearner>(); singleLabelLearners = new ArrayList<FilteredClassifier>(); countSingle = 0;//Reset the values to zero and reuse the variables countMulti = 0; //TODO: Add more comments for the procedure for (int totalSplitNo = 0; totalSplitNo < splitOrder.length; totalSplitNo++) { debug("Building set " + (totalSplitNo + 1) + "/" + splitOrder.length); if (splitOrder[totalSplitNo].length > 1) { //Remove the unneeded labels Instances trainSubset = trainingSet.getDataSet(); remove[totalSplitNo] = new Remove(); remove[totalSplitNo].setAttributeIndicesArray(absoluteIndicesToRemove[totalSplitNo]); remove[totalSplitNo].setInputFormat(trainSubset); remove[totalSplitNo].setInvertSelection(false); trainSubset = Filter.useFilter(trainSubset, remove[totalSplitNo]); //Reintegrate dataset and train learner multiLabelLearners.add(baseMultiLabelLearner.makeCopy()); multiLabelLearners.get(countMulti).build(trainingSet.reintegrateModifiedDataSet(trainSubset)); countMulti++; } else { debug("Single Label model."); //Initialize the FilteredClassifiers singleLabelLearners.add(new FilteredClassifier()); singleLabelLearners.get(countSingle).setClassifier(AbstractClassifier.makeCopy(baseClassifier)); Instances trainSubset = trainingSet.getDataSet(); //Set the remove filter for the FilteredClassifiers remove[totalSplitNo] = new Remove(); remove[totalSplitNo].setAttributeIndicesArray(absoluteIndicesToRemove[totalSplitNo]); remove[totalSplitNo].setInputFormat(trainSubset); remove[totalSplitNo].setInvertSelection(false); singleLabelLearners.get(countSingle).setFilter(remove[totalSplitNo]); //Set the remaining label as the class index trainSubset.setClassIndex(labelIndices[splitOrder[totalSplitNo][0]]); //Train singleLabelLearners.get(countSingle).buildClassifier(trainSubset); countSingle++; } } }
From source file:mulan.classifier.transformation.BinaryRelevance.java
License:Open Source License
protected void buildInternal(MultiLabelInstances train) throws Exception { numLabels = train.getNumLabels();//ww w .jav a 2s .co m ensemble = new FilteredClassifier[numLabels]; Instances trainingData = train.getDataSet(); for (int i = 0; i < numLabels; i++) { ensemble[i] = new FilteredClassifier(); ensemble[i].setClassifier(AbstractClassifier.makeCopy(baseClassifier)); // Indices of attributes to remove int[] indicesToRemove = new int[numLabels - 1]; int counter2 = 0; for (int counter1 = 0; counter1 < numLabels; counter1++) { if (labelIndices[counter1] != labelIndices[i]) { indicesToRemove[counter2] = labelIndices[counter1]; counter2++; } } Remove remove = new Remove(); remove.setAttributeIndicesArray(indicesToRemove); remove.setInputFormat(trainingData); remove.setInvertSelection(false); ensemble[i].setFilter(remove); trainingData.setClassIndex(labelIndices[i]); // debug("Bulding model " + (i + 1) + "/" + numLabels); System.out.println("Bulding model " + (i + 1) + "/" + numLabels); ensemble[i].buildClassifier(trainingData); } }
From source file:mulan.classifier.transformation.ClassifierChain.java
License:Open Source License
protected void buildInternal(MultiLabelInstances train) throws Exception { if (chain == null) { chain = new int[numLabels]; for (int i = 0; i < numLabels; i++) { chain[i] = i;//from w w w. j av a 2 s.c om } } Instances trainDataset; numLabels = train.getNumLabels(); ensemble = new FilteredClassifier[numLabels]; trainDataset = train.getDataSet(); for (int i = 0; i < numLabels; i++) { ensemble[i] = new FilteredClassifier(); ensemble[i].setClassifier(AbstractClassifier.makeCopy(baseClassifier)); // Indices of attributes to remove first removes numLabels attributes // the numLabels - 1 attributes and so on. // The loop starts from the last attribute. int[] indicesToRemove = new int[numLabels - 1 - i]; int counter2 = 0; for (int counter1 = 0; counter1 < numLabels - i - 1; counter1++) { indicesToRemove[counter1] = labelIndices[chain[numLabels - 1 - counter2]]; counter2++; } Remove remove = new Remove(); remove.setAttributeIndicesArray(indicesToRemove); remove.setInputFormat(trainDataset); remove.setInvertSelection(false); ensemble[i].setFilter(remove); trainDataset.setClassIndex(labelIndices[chain[i]]); debug("Bulding model " + (i + 1) + "/" + numLabels); //=============================================================== System.out.println("Bulding model " + (i + 1) + "/" + numLabels); //=============================================================== ensemble[i].buildClassifier(trainDataset); } }
From source file:mulan.classifier.transformation.MultiLabelStacking.java
License:Open Source License
/** * Builds the base-level classifiers./*from w w w . j a v a2 s. c o m*/ * Their predictions are gathered in the baseLevelPredictions member * @param trainingSet * @throws Exception */ public void buildBaseLevel(MultiLabelInstances trainingSet) throws Exception { train = new Instances(trainingSet.getDataSet()); baseLevelData = new Instances[numLabels]; baseLevelEnsemble = AbstractClassifier.makeCopies(baseClassifier, numLabels); if (normalize) { maxProb = new double[numLabels]; minProb = new double[numLabels]; Arrays.fill(minProb, 1); } // initialize the table holding the predictions of the first level // classifiers for each label for every instance of the training set baseLevelPredictions = new double[train.numInstances()][numLabels]; for (int labelIndex = 0; labelIndex < numLabels; labelIndex++) { debug("Label: " + labelIndex); // transform the dataset according to the BR method baseLevelData[labelIndex] = BinaryRelevanceTransformation.transformInstances(train, labelIndices, labelIndices[labelIndex]); // attach indexes in order to keep track of the original positions baseLevelData[labelIndex] = new Instances(attachIndexes(baseLevelData[labelIndex])); // prepare the transformed dataset for stratified x-fold cv Random random = new Random(1); baseLevelData[labelIndex].randomize(random); baseLevelData[labelIndex].stratify(numFolds); debug("Creating meta-data"); for (int j = 0; j < numFolds; j++) { debug("Label=" + labelIndex + ", Fold=" + j); Instances subtrain = baseLevelData[labelIndex].trainCV(numFolds, j, random); // create a filtered meta classifier, used to ignore // the index attribute in the build process // perform stratified x-fold cv and get predictions // for each class for every instance FilteredClassifier fil = new FilteredClassifier(); fil.setClassifier(baseLevelEnsemble[labelIndex]); Remove remove = new Remove(); remove.setAttributeIndices("first"); remove.setInputFormat(subtrain); fil.setFilter(remove); fil.buildClassifier(subtrain); // Classify test instance Instances subtest = baseLevelData[labelIndex].testCV(numFolds, j); for (int i = 0; i < subtest.numInstances(); i++) { double distribution[] = new double[2]; distribution = fil.distributionForInstance(subtest.instance(i)); // Ensure correct predictions both for class values {0,1} // and {1,0} Attribute classAttribute = baseLevelData[labelIndex].classAttribute(); baseLevelPredictions[(int) subtest.instance(i) .value(0)][labelIndex] = distribution[classAttribute.indexOfValue("1")]; if (normalize) { if (distribution[classAttribute.indexOfValue("1")] > maxProb[labelIndex]) { maxProb[labelIndex] = distribution[classAttribute.indexOfValue("1")]; } if (distribution[classAttribute.indexOfValue("1")] < minProb[labelIndex]) { minProb[labelIndex] = distribution[classAttribute.indexOfValue("1")]; } } } } // now we can detach the indexes from the first level datasets baseLevelData[labelIndex] = detachIndexes(baseLevelData[labelIndex]); debug("Building base classifier on full data"); // build base classifier on the full training data baseLevelEnsemble[labelIndex].buildClassifier(baseLevelData[labelIndex]); baseLevelData[labelIndex].delete(); } if (normalize) { normalizePredictions(); } }
From source file:mulan.classifier.transformation.MultiLabelStacking.java
License:Open Source License
/** * Builds the ensemble of meta-level classifiers. * * @throws Exception//from w ww .j a v a2 s . c o m */ public void buildMetaLevel() throws Exception { debug("Building the ensemle of the meta level classifiers"); for (int i = 0; i < numLabels; i++) { // creating meta-level data new ArrayList<Attribute> attributes = new ArrayList<Attribute>(); if (includeAttrs) {// create an ArrayList with numAttributes size for (int j = 0; j < train.numAttributes(); j++) { attributes.add(train.attribute(j)); } } else {// create a FastVector with numLabels size for (int j = 0; j < numLabels; j++) { attributes.add(train.attribute(labelIndices[j])); } } attributes.add(train.attribute(labelIndices[i]).copy("meta")); metaLevelData[i] = new Instances("Meta format", attributes, 0); metaLevelData[i].setClassIndex(metaLevelData[i].numAttributes() - 1); // add the meta instances new for (int l = 0; l < train.numInstances(); l++) { double[] values = new double[metaLevelData[i].numAttributes()]; if (includeAttrs) { // Copy the original features for (int m = 0; m < featureIndices.length; m++) { values[m] = train.instance(l).value(featureIndices[m]); } // Copy the label confidences as additional features for (int m = 0; m < numLabels; m++) { values[train.numAttributes() - numLabels + m] = baseLevelPredictions[l][m]; } } else { for (int m = 0; m < numLabels; m++) { values[m] = baseLevelPredictions[l][m]; } } values[values.length - 1] = Double.parseDouble( train.attribute(labelIndices[i]).value((int) train.instance(l).value(labelIndices[i]))); Instance metaInstance = DataUtils.createInstance(train.instance(l), 1, values); metaInstance.setDataset(metaLevelData[i]); if (values[values.length - 1] > 0.5) { metaInstance.setClassValue("1"); } else { metaInstance.setClassValue("0"); } metaLevelData[i].add(metaInstance); } // We utilize a filtered classifier to prune uncorrelated labels metaLevelFilteredEnsemble[i] = new FilteredClassifier(); metaLevelFilteredEnsemble[i].setClassifier(metaLevelEnsemble[i]); Remove remove = new Remove(); if (topkCorrelated < numLabels) { remove.setAttributeIndicesArray(selectedAttributes[i]); } else { remove.setAttributeIndices("first-last"); } remove.setInvertSelection(true); remove.setInputFormat(metaLevelData[i]); metaLevelFilteredEnsemble[i].setFilter(remove); debug("Building classifier for meta training set" + i); metaLevelFilteredEnsemble[i].buildClassifier(metaLevelData[i]); metaLevelData[i].delete(); } }
From source file:mulan.regressor.transformation.RegressorChainSimple.java
License:Open Source License
protected void buildInternal(MultiLabelInstances train) throws Exception { // if no chain has been defined, create the default chain if (chain == null) { chain = new int[numLabels]; for (int j = 0; j < numLabels; j++) { chain[j] = labelIndices[j];/*from www .j a va 2 s. com*/ } } if (chainSeed != 0) { // a random chain will be created by shuffling the existing chain Random rand = new Random(chainSeed); ArrayList<Integer> chainAsList = new ArrayList<Integer>(numLabels); for (int j = 0; j < numLabels; j++) { chainAsList.add(chain[j]); } Collections.shuffle(chainAsList, rand); for (int j = 0; j < numLabels; j++) { chain[j] = chainAsList.get(j); } } debug("Using chain: " + Arrays.toString(chain)); chainRegressors = new FilteredClassifier[numLabels]; Instances trainDataset = train.getDataSet(); for (int i = 0; i < numLabels; i++) { chainRegressors[i] = new FilteredClassifier(); chainRegressors[i].setClassifier(AbstractClassifier.makeCopy(baseRegressor)); // Indices of attributes to remove. // First removes numLabels attributes, then numLabels - 1 attributes and so on. // The loop starts from the last attribute. int[] indicesToRemove = new int[numLabels - 1 - i]; for (int counter1 = 0; counter1 < numLabels - i - 1; counter1++) { indicesToRemove[counter1] = chain[numLabels - 1 - counter1]; } Remove remove = new Remove(); remove.setAttributeIndicesArray(indicesToRemove); remove.setInvertSelection(false); remove.setInputFormat(trainDataset); chainRegressors[i].setFilter(remove); trainDataset.setClassIndex(chain[i]); debug("Bulding model " + (i + 1) + "/" + numLabels); chainRegressors[i].setDebug(true); chainRegressors[i].buildClassifier(trainDataset); } }
From source file:mulan.regressor.transformation.SingleTargetRegressor.java
License:Open Source License
protected void buildInternal(MultiLabelInstances mlTrainSet) throws Exception { stRegressors = new FilteredClassifier[numLabels]; // any changes are applied to a copy of the original dataset Instances trainSet = new Instances(mlTrainSet.getDataSet()); for (int i = 0; i < numLabels; i++) { stRegressors[i] = new FilteredClassifier(); stRegressors[i].setClassifier(AbstractClassifier.makeCopy(baseRegressor)); // Indices of attributes to remove. All labelIndices except for the current index int[] indicesToRemove = new int[numLabels - 1]; int counter2 = 0; for (int counter1 = 0; counter1 < numLabels; counter1++) { if (labelIndices[counter1] != labelIndices[i]) { indicesToRemove[counter2] = labelIndices[counter1]; counter2++;//from w w w. jav a2s . c om } } Remove remove = new Remove(); remove.setAttributeIndicesArray(indicesToRemove); remove.setInvertSelection(false); remove.setInputFormat(trainSet); stRegressors[i].setFilter(remove); trainSet.setClassIndex(labelIndices[i]); debug("Bulding model " + (i + 1) + "/" + numLabels); stRegressors[i].buildClassifier(trainSet); } }
From source file:nl.uva.sne.commons.ClusterUtils.java
private static FilteredClassifier buildModel(int[] indicesToRemove, int classIndex, Instances trainDataset, Classifier cl) throws Exception { FilteredClassifier model = new FilteredClassifier(); model.setClassifier(AbstractClassifier.makeCopy(cl)); Remove remove = new Remove(); remove.setAttributeIndicesArray(indicesToRemove); remove.setInputFormat(trainDataset); remove.setInvertSelection(false);/* ww w. j ava2 s .c o m*/ model.setFilter(remove); trainDataset.setClassIndex(classIndex); model.buildClassifier(trainDataset); // int foldHash = trainDataset.toString().hashCode(); // String modelKey = createKey(indicesToRemove, foldHash); // existingModels.put(modelKey, model); return model; }