List of usage examples for weka.core Instance setDataset
public void setDataset(Instances instances);
From source file:mulan.classifier.neural.MMPLearner.java
License:Open Source License
private double[] getFeatureVector(Instance inputInstance) { if (convertNomToBin && nomToBinFilter != null) { try {//from w ww. ja v a 2 s .c o m nomToBinFilter.input(inputInstance); inputInstance = nomToBinFilter.output(); inputInstance.setDataset(null); } catch (Exception ex) { throw new InvalidDataException("The input instance for prediction is invalid. " + "Instance is not consistent with the data the model was built for."); } } // check if number in attributes is at least equal to model input int numAttributes = inputInstance.numAttributes(); int modelInputDim = perceptrons.get(0).getWeights().length - 1; if (numAttributes < modelInputDim) { throw new InvalidDataException("Input instance do not have enough attributes " + "to be processed by the model. Instance is not consistent with the data the model was built for."); } // if instance has more attributes than model input, we assume that true outputs // are there, so we remove them List<Integer> labelIndices = new ArrayList<Integer>(); boolean labelsAreThere = false; if (numAttributes > modelInputDim) { for (int index : this.labelIndices) { labelIndices.add(index); } labelsAreThere = true; } double[] inputPattern = new double[modelInputDim]; int indexCounter = 0; for (int attrIndex = 0; attrIndex < numAttributes; attrIndex++) { if (labelsAreThere && labelIndices.contains(attrIndex)) { continue; } inputPattern[indexCounter] = inputInstance.value(attrIndex); indexCounter++; } return inputPattern; }
From source file:mulan.classifier.transformation.CalibratedLabelRanking.java
License:Open Source License
/** * This method does a prediction for an instance with the values of label missing * @param instance//from ww w.j av a2s .c o m * @return prediction * @throws java.lang.Exception */ public MultiLabelOutput makePredictionStandard(Instance instance) throws Exception { boolean[] bipartition = new boolean[numLabels]; double[] confidences = new double[numLabels]; int[] voteLabel = new int[numLabels + 1]; //System.out.println("Instance:" + instance.toString()); // delete all labels and add a new atribute at the end Instance newInstance = RemoveAllLabels.transformInstance(instance, labelIndices); newInstance.insertAttributeAt(newInstance.numAttributes()); //initialize the array voteLabel Arrays.fill(voteLabel, 0); int counter = 0; for (int label1 = 0; label1 < numLabels - 1; label1++) { for (int label2 = label1 + 1; label2 < numLabels; label2++) { if (!nodata[counter]) { double distribution[] = new double[2]; try { newInstance.setDataset(metaDataTest[counter]); distribution = oneVsOneModels[counter].distributionForInstance(newInstance); } catch (Exception e) { System.out.println(e); return null; } int maxIndex = (distribution[0] > distribution[1]) ? 0 : 1; // Ensure correct predictions both for class values {0,1} and {1,0} Attribute classAttribute = metaDataTest[counter].classAttribute(); if (classAttribute.value(maxIndex).equals("1")) { voteLabel[label1]++; } else { voteLabel[label2]++; } } counter++; } } int voteVirtual = 0; MultiLabelOutput virtualMLO = virtualLabelModels.makePrediction(instance); boolean[] virtualBipartition = virtualMLO.getBipartition(); for (int i = 0; i < numLabels; i++) { if (virtualBipartition[i]) { voteLabel[i]++; } else { voteVirtual++; } } for (int i = 0; i < numLabels; i++) { if (voteLabel[i] >= voteVirtual) { bipartition[i] = true; } else { bipartition[i] = false; } confidences[i] = 1.0 * voteLabel[i] / numLabels; } MultiLabelOutput mlo = new MultiLabelOutput(bipartition, confidences); return mlo; }
From source file:mulan.classifier.transformation.CalibratedLabelRanking.java
License:Open Source License
/** * This method does a prediction for an instance with the values of label missing * according to QWeighted algorithm for Multilabel Classification (QCMLPP2), which is * described in ://from w w w . ja va 2s. c om * Loza Mencia, E., Park, S.-H., and Fuernkranz, J. (2009) * Efficient voting prediction for pairwise multilabel classification. * In Proceedings of 17th European Symposium on Artificial * Neural Networks (ESANN 2009), Bruges (Belgium), April 2009 * * This method reduces the number of classifier evaluations and guarantees the same * Multilabel Output as ordinary Voting. But: the estimated confidences are only * approximated. Therefore, ranking-based performances are worse than ordinary voting. * @param instance * @return prediction * @throws java.lang.Exception */ public MultiLabelOutput makePredictionQW(Instance instance) throws Exception { int[] voteLabel = new int[numLabels]; int[] played = new int[numLabels + 1]; int[][] playedMatrix = new int[numLabels + 1][numLabels + 1]; int[] sortarr = new int[numLabels + 1]; double[] limits = new double[numLabels]; boolean[] bipartition = new boolean[numLabels]; double[] confidences = new double[numLabels]; int voteVirtual = 0; double limitVirtual = 0.0; boolean allEqualClassesFound = false; // delete all labels and add a new atribute at the end Instance newInstance = RemoveAllLabels.transformInstance(instance, labelIndices); newInstance.insertAttributeAt(newInstance.numAttributes()); //initialize the array voteLabel Arrays.fill(voteLabel, 0); // evaluate all classifiers of the calibrated label beforehand, #numLabels 1 vs. A evaluations MultiLabelOutput virtualMLO = virtualLabelModels.makePrediction(instance); boolean[] virtualBipartition = virtualMLO.getBipartition(); for (int i = 0; i < numLabels; i++) { if (virtualBipartition[i]) { voteLabel[i]++; } else { voteVirtual++; } played[i]++; playedMatrix[i][numLabels] = 1; playedMatrix[numLabels][i] = 1; limits[i] = played[i] - voteLabel[i]; } limitVirtual = numLabels - voteVirtual; played[numLabels] = numLabels; // apply QWeighted iteratively to estimate all relevant labels until the // calibrated label is found boolean found = false; int pos = 0; int player1 = -1; int player2 = -1; while (!allEqualClassesFound && pos < numLabels) { while (!found) { // opponent selection process: pair best against second best w.r.t. to number of "lost games" // player1 = pick player with min(limits[player]) && player isn't ranked sortarr = Utils.sort(limits); player1 = sortarr[0]; player2 = -1; int i = 1; // can we found unplayed matches of player1 ? if (played[player1] < numLabels) { // search for best opponent while (player2 == -1 && i < sortarr.length) { // already played ?? if (playedMatrix[player1][sortarr[i]] == 0) { player2 = sortarr[i]; } i++; } // play found Pairing and update stats int modelIndex = getRRClassifierIndex(player1, player2); newInstance.setDataset(metaDataTest[modelIndex]); double[] distribution = oneVsOneModels[modelIndex].distributionForInstance(newInstance); int maxIndex = (distribution[0] > distribution[1]) ? 0 : 1; // Ensure correct predictions both for class values {0,1} and {1,0} Attribute classAttribute = metaDataTest[modelIndex].classAttribute(); if (classAttribute.value(maxIndex).equals("1")) { voteLabel[player1 > player2 ? player2 : player1]++; } else { voteLabel[player1 > player2 ? player1 : player2]++; } // update stats played[player1]++; played[player2]++; playedMatrix[player1][player2] = 1; playedMatrix[player2][player1] = 1; limits[player1] = played[player1] - voteLabel[player1]; limits[player2] = played[player2] - voteLabel[player2]; } // full played, there are no opponents left else { found = true; } } //arrange already as relevant validated labels at the end of possible opponents limits[player1] = Double.MAX_VALUE; //check for possible labels, which can still gain greater or equal votes as the calibrated label allEqualClassesFound = true; for (int i = 0; i < numLabels; i++) { if (limits[i] <= limitVirtual) { allEqualClassesFound = false; } } // search for next relevant label found = false; pos++; } //Generate Multilabel Output for (int i = 0; i < numLabels; i++) { if (voteLabel[i] >= voteVirtual) { bipartition[i] = true; } else { bipartition[i] = false; } confidences[i] = 1.0 * voteLabel[i] / numLabels; } MultiLabelOutput mlo = new MultiLabelOutput(bipartition, confidences); return mlo; }
From source file:mulan.classifier.transformation.IncludeLabelsClassifier.java
License:Open Source License
protected MultiLabelOutput makePredictionInternal(Instance instance) throws Exception { double[] confidences = new double[numLabels]; boolean[] bipartition = new boolean[numLabels]; Instance newInstance = pt6Trans.transformInstance(instance); //calculate confidences //debug(instance.toString()); for (int i = 0; i < numLabels; i++) { newInstance.setDataset(transformed); newInstance.setValue(newInstance.numAttributes() - 2, instance.dataset().attribute(labelIndices[i]).name()); //debug(newInstance.toString()); double[] temp = baseClassifier.distributionForInstance(newInstance); //debug(temp.toString()); confidences[i] = temp[transformed.classAttribute().indexOfValue("1")]; //debug("" + confidences[i]); bipartition[i] = temp[transformed.classAttribute().indexOfValue("1")] >= temp[transformed .classAttribute().indexOfValue("0")] ? true : false; //debug("" + bipartition[i]); }/*w w w . j ava2 s. co m*/ MultiLabelOutput mlo = new MultiLabelOutput(bipartition, confidences); return mlo; }
From source file:mulan.classifier.transformation.MultiClassLearner.java
License:Open Source License
protected MultiLabelOutput makePredictionInternal(Instance instance) throws Exception { //delete labels instance = RemoveAllLabels.transformInstance(instance, labelIndices); instance.setDataset(null); instance.insertAttributeAt(instance.numAttributes()); instance.setDataset(header);/*w ww. java2 s .c o m*/ double[] distribution = baseClassifier.distributionForInstance(instance); MultiLabelOutput mlo = new MultiLabelOutput(MultiLabelOutput.ranksFromValues(distribution)); return mlo; }
From source file:mulan.classifier.transformation.MultiLabelStacking.java
License:Open Source License
/** * Initializes all the parameters used in the meta-level. * Calculates the correlated labels if meta-level pruning is applied. * * @param dataSet//from ww w . j a va2 s.c om * @param metaClassifier * @param includeAttrs * @param metaPercentage * @param eval * @throws Exception */ public void initializeMetaLevel(MultiLabelInstances dataSet, Classifier metaClassifier, boolean includeAttrs, double metaPercentage, ASEvaluation eval) throws Exception { this.metaClassifier = metaClassifier; metaLevelEnsemble = AbstractClassifier.makeCopies(metaClassifier, numLabels); metaLevelData = new Instances[numLabels]; metaLevelFilteredEnsemble = new FilteredClassifier[numLabels]; this.includeAttrs = includeAttrs; // calculate the number of correlated labels that corresponds to the // given percentage topkCorrelated = (int) Math.floor(metaPercentage * numLabels); if (topkCorrelated < 1) { debug("Too small percentage, selecting k=1"); topkCorrelated = 1; } if (topkCorrelated < numLabels) {// pruning should be applied selectedAttributes = new int[numLabels][]; if (eval == null) {// calculate the PhiCoefficient Statistics phi = new Statistics(); phi.calculatePhi(dataSet); for (int i = 0; i < numLabels; i++) { selectedAttributes[i] = phi.topPhiCorrelatedLabels(i, topkCorrelated); } } else {// apply feature selection AttributeSelection attsel = new AttributeSelection(); Ranker rankingMethod = new Ranker(); rankingMethod.setNumToSelect(topkCorrelated); attsel.setEvaluator(eval); attsel.setSearch(rankingMethod); // create a dataset consisting of all the classes of each // instance plus the class we want to select attributes from for (int i = 0; i < numLabels; i++) { ArrayList<Attribute> attributes = new ArrayList<Attribute>(); for (int j = 0; j < numLabels; j++) { attributes.add(train.attribute(labelIndices[j])); } attributes.add(train.attribute(labelIndices[i]).copy("meta")); Instances iporesult = new Instances("Meta format", attributes, 0); iporesult.setClassIndex(numLabels); for (int k = 0; k < train.numInstances(); k++) { double[] values = new double[numLabels + 1]; for (int m = 0; m < numLabels; m++) { values[m] = Double.parseDouble(train.attribute(labelIndices[m]) .value((int) train.instance(k).value(labelIndices[m]))); } values[numLabels] = Double.parseDouble(train.attribute(labelIndices[i]) .value((int) train.instance(k).value(labelIndices[i]))); Instance metaInstance = DataUtils.createInstance(train.instance(k), 1, values); metaInstance.setDataset(iporesult); iporesult.add(metaInstance); } attsel.SelectAttributes(iporesult); selectedAttributes[i] = attsel.selectedAttributes(); iporesult.delete(); } } } }
From source file:mulan.classifier.transformation.MultiLabelStacking.java
License:Open Source License
/** * Builds the ensemble of meta-level classifiers. * * @throws Exception/*from w ww . j av a2 s. com*/ */ public void buildMetaLevel() throws Exception { debug("Building the ensemle of the meta level classifiers"); for (int i = 0; i < numLabels; i++) { // creating meta-level data new ArrayList<Attribute> attributes = new ArrayList<Attribute>(); if (includeAttrs) {// create an ArrayList with numAttributes size for (int j = 0; j < train.numAttributes(); j++) { attributes.add(train.attribute(j)); } } else {// create a FastVector with numLabels size for (int j = 0; j < numLabels; j++) { attributes.add(train.attribute(labelIndices[j])); } } attributes.add(train.attribute(labelIndices[i]).copy("meta")); metaLevelData[i] = new Instances("Meta format", attributes, 0); metaLevelData[i].setClassIndex(metaLevelData[i].numAttributes() - 1); // add the meta instances new for (int l = 0; l < train.numInstances(); l++) { double[] values = new double[metaLevelData[i].numAttributes()]; if (includeAttrs) { // Copy the original features for (int m = 0; m < featureIndices.length; m++) { values[m] = train.instance(l).value(featureIndices[m]); } // Copy the label confidences as additional features for (int m = 0; m < numLabels; m++) { values[train.numAttributes() - numLabels + m] = baseLevelPredictions[l][m]; } } else { for (int m = 0; m < numLabels; m++) { values[m] = baseLevelPredictions[l][m]; } } values[values.length - 1] = Double.parseDouble( train.attribute(labelIndices[i]).value((int) train.instance(l).value(labelIndices[i]))); Instance metaInstance = DataUtils.createInstance(train.instance(l), 1, values); metaInstance.setDataset(metaLevelData[i]); if (values[values.length - 1] > 0.5) { metaInstance.setClassValue("1"); } else { metaInstance.setClassValue("0"); } metaLevelData[i].add(metaInstance); } // We utilize a filtered classifier to prune uncorrelated labels metaLevelFilteredEnsemble[i] = new FilteredClassifier(); metaLevelFilteredEnsemble[i].setClassifier(metaLevelEnsemble[i]); Remove remove = new Remove(); if (topkCorrelated < numLabels) { remove.setAttributeIndicesArray(selectedAttributes[i]); } else { remove.setAttributeIndices("first-last"); } remove.setInvertSelection(true); remove.setInputFormat(metaLevelData[i]); metaLevelFilteredEnsemble[i].setFilter(remove); debug("Building classifier for meta training set" + i); metaLevelFilteredEnsemble[i].buildClassifier(metaLevelData[i]); metaLevelData[i].delete(); } }
From source file:mulan.classifier.transformation.MultiLabelStacking.java
License:Open Source License
@Override protected MultiLabelOutput makePredictionInternal(Instance instance) throws Exception { boolean[] bipartition = new boolean[numLabels]; // the confidences given as final output double[] metaconfidences = new double[numLabels]; // the confidences produced by the first level ensemble of classfiers double[] confidences = new double[numLabels]; if (!(baseClassifier instanceof IBk)) { // getting the confidences for each label for (int labelIndex = 0; labelIndex < numLabels; labelIndex++) { Instance newInstance = BinaryRelevanceTransformation.transformInstance(instance, labelIndices, labelIndices[labelIndex]); newInstance.setDataset(baseLevelData[labelIndex]); double distribution[] = new double[2]; distribution = baseLevelEnsemble[labelIndex].distributionForInstance(newInstance); // Ensure correct predictions both for class values {0,1} and // {1,0} Attribute classAttribute = baseLevelData[labelIndex].classAttribute(); // The confidence of the label being equal to 1 confidences[labelIndex] = distribution[classAttribute.indexOfValue("1")]; }/*from www .j a v a 2s. co m*/ } else { // old way using brknn // MultiLabelOutput prediction = brknn.makePrediction(instance); // confidences = prediction.getConfidences(); // new way int numOfNeighbors = ((IBk) baseClassifier).getKNN(); Instances knn = new Instances(lnn.kNearestNeighbours(instance, numOfNeighbors)); /* * Get the label confidence vector. */ for (int i = 0; i < numLabels; i++) { // compute sum of counts for each label in KNN double count_for_label_i = 0; for (int k = 0; k < numOfNeighbors; k++) { double value = Double.parseDouble( train.attribute(labelIndices[i]).value((int) knn.instance(k).value(labelIndices[i]))); if (Utils.eq(value, 1.0)) { count_for_label_i++; } } confidences[i] = count_for_label_i / numOfNeighbors; } } // System.out.println(Utils.arrayToString(confidences)); /* creation of the meta-instance with the appropriate values */ double[] values = new double[numLabels + 1]; if (includeAttrs) { values = new double[instance.numAttributes() + 1]; // Copy the original features for (int m = 0; m < featureIndices.length; m++) { values[m] = instance.value(featureIndices[m]); } // Copy the label confidences as additional features for (int m = 0; m < confidences.length; m++) { values[instance.numAttributes() - numLabels + m] = confidences[m]; } } else { for (int m = 0; m < confidences.length; m++) { values[m] = confidences[m]; } } /* application of the meta-level ensemble to the metaInstance */ for (int labelIndex = 0; labelIndex < numLabels; labelIndex++) { // values[values.length - 1] = // instance.value(instance.numAttributes() - numLabels + // labelIndex); values[values.length - 1] = 0; Instance newmetaInstance = DataUtils.createInstance(instance, 1, values); double distribution[] = new double[2]; try { distribution = metaLevelFilteredEnsemble[labelIndex].distributionForInstance(newmetaInstance); } catch (Exception e) { System.out.println(e); return null; } int maxIndex = (distribution[0] > distribution[1]) ? 0 : 1; // Ensure correct predictions both for class values {0,1} and {1,0} Attribute classAttribute = metaLevelData[labelIndex].classAttribute(); bipartition[labelIndex] = (classAttribute.value(maxIndex).equals("1")) ? true : false; // The confidence of the label being equal to 1 metaconfidences[labelIndex] = distribution[classAttribute.indexOfValue("1")]; } MultiLabelOutput mlo = new MultiLabelOutput(bipartition, metaconfidences); return mlo; }
From source file:mulan.classifier.transformation.MultiLabelStacking.java
License:Open Source License
/** * Attaches an index attribute at the beginning of each instance * * @param original/*w w w . j a v a 2 s . c om*/ * @return */ protected Instances attachIndexes(Instances original) { ArrayList<Attribute> attributes = new ArrayList<Attribute>(original.numAttributes() + 1); for (int i = 0; i < original.numAttributes(); i++) { attributes.add(original.attribute(i)); } // Add attribute for holding the index at the beginning. attributes.add(0, new Attribute("Index")); Instances transformed = new Instances("Meta format", attributes, 0); for (int i = 0; i < original.numInstances(); i++) { Instance newInstance; newInstance = (Instance) original.instance(i).copy(); newInstance.setDataset(null); newInstance.insertAttributeAt(0); newInstance.setValue(0, i); transformed.add(newInstance); } transformed.setClassIndex(original.classIndex() + 1); return transformed; }
From source file:mulan.classifier.transformation.TwoStageClassifierChainArchitecture.java
License:Open Source License
/** * This method does a prediction for an instance with the values of label * missing according to Two Stage Pruned Classifier Chain (TSPCCA), which is * described in : Madjarov, Gj., Gjorgjevikj, D. and Dzeroski, S. Two stage * architecture for multi-label learning Pattern Recognition, vol. 45, pp. * 10191034, 2012/*from ww w . j av a2s . co m*/ * * @param instance the instance used * @return prediction the prediction made * @throws java.lang.Exception Potential exception thrown. To be handled in an upper level. */ private MultiLabelOutput makePredictionTSCCV(Instance instance) throws Exception { boolean[] bipartition = new boolean[numLabels]; double[] confidences = new double[numLabels]; int[] voteLabel = new int[numLabels + 1]; int[] noVoteLabel = new int[numLabels + 1]; int[] voteFromVirtualModels = new int[numLabels]; double[] confidenceFromVirtualModels = new double[numLabels]; //initialize the array voteLabel Arrays.fill(voteLabel, 0); Arrays.fill(noVoteLabel, 0); Arrays.fill(voteFromVirtualModels, 0); Arrays.fill(confidenceFromVirtualModels, 0.0); int voteVirtual = 0; MultiLabelOutput virtualMLO = virtualLabelModels.makePrediction(instance); boolean[] virtualBipartition = virtualMLO.getBipartition(); //number of classifiers of the first layer that forward the instance to the second layer int forwards = 0; for (int i = 0; i < numLabels; i++) { if (virtualMLO.hasConfidences()) { confidenceFromVirtualModels[i] = virtualMLO.getConfidences()[i]; //System.out.print(confidenceFromVirtualModels[i]); //System.out.print("\t"); } if (virtualBipartition[i]) { voteLabel[i]++; voteFromVirtualModels[i]++; } else { voteVirtual++; } if (confidenceFromVirtualModels[i] > threshold) { forwards++; } } Instance newInstanceFirstStage; //add predictions from the vurtual models if (instance instanceof SparseInstance) { newInstanceFirstStage = modifySparseInstance(instance, virtualMLO.getConfidences()); } else { newInstanceFirstStage = modifyDenseInstance(instance, virtualMLO.getConfidences()); } // delete all labels and add a new atribute at the end Instance newInstance = RemoveAllLabels.transformInstance(newInstanceFirstStage, labelIndices); newInstance.insertAttributeAt(newInstance.numAttributes()); int counter = 0; for (int label1 = 0; label1 < numLabels - 1; label1++) { for (int label2 = label1 + 1; label2 < numLabels; label2++) { if (!nodata[counter]) { if (confidenceFromVirtualModels[label1] > threshold && confidenceFromVirtualModels[label2] > threshold) { double distribution[]; try { newInstance.setDataset(metaDataTest[counter]); distribution = oneVsOneModels[counter].distributionForInstance(newInstance); } catch (Exception e) { System.out.println(e); return null; } int maxIndex = (distribution[0] > distribution[1]) ? 0 : 1; // Ensure correct predictions both for class values {0,1} and {1,0} Attribute classAttribute = metaDataTest[counter].classAttribute(); if (classAttribute.value(maxIndex).equals("1")) { voteLabel[label1]++; } else { voteLabel[label2]++; } } else if (confidenceFromVirtualModels[label1] > threshold) { voteLabel[label1]++; } else if (confidenceFromVirtualModels[label2] > threshold) { voteLabel[label2]++; } else { noVoteLabel[label1]++; noVoteLabel[label2]++; } } counter++; } } avgForwards += forwards; for (int i = 0; i < numLabels; i++) { if (voteLabel[i] >= voteVirtual) { bipartition[i] = true; confidences[i] = (1.0 * voteLabel[i]) / (numLabels - noVoteLabel[i]); } else { bipartition[i] = false; confidences[i] = 1.0 * confidenceFromVirtualModels[i] / numLabels; //confidences[i]=confidenceFromVirtualModels[i]; } //System.out.println(bipartition[i]); //System.out.println(confidences[i]); //confidences[i]*=confidenceFromVirtualModels[i]; } MultiLabelOutput mlo = new MultiLabelOutput(bipartition, confidences); return mlo; }