List of usage examples for weka.core Instance value
public double value(Attribute att);
From source file:mulan.classifier.lazy.BRkNN.java
License:Open Source License
/** * Calculates the confidences of the labels, based on the neighboring * instances// w w w . jav a 2 s . c o m * * @param neighbours * the list of nearest neighboring instances * @param distances * the distances of the neighbors * @return the confidences of the labels */ private double[] getConfidences(Instances neighbours, double[] distances) { double total = 0, weight; double neighborLabels = 0; double[] confidences = new double[numLabels]; // Set up a correction to the estimator for (int i = 0; i < numLabels; i++) { confidences[i] = 1.0 / Math.max(1, train.numInstances()); } total = (double) numLabels / Math.max(1, train.numInstances()); for (int i = 0; i < neighbours.numInstances(); i++) { // Collect class counts Instance current = neighbours.instance(i); distances[i] = distances[i] * distances[i]; distances[i] = Math.sqrt(distances[i] / (train.numAttributes() - numLabels)); switch (distanceWeighting) { case WEIGHT_INVERSE: weight = 1.0 / (distances[i] + 0.001); // to avoid division by // zero break; case WEIGHT_SIMILARITY: weight = 1.0 - distances[i]; break; default: // WEIGHT_NONE: weight = 1.0; break; } weight *= current.weight(); for (int j = 0; j < numLabels; j++) { double value = Double.parseDouble( current.attribute(labelIndices[j]).value((int) current.value(labelIndices[j]))); if (Utils.eq(value, 1.0)) { confidences[j] += weight; neighborLabels += weight; } } total += weight; } avgPredictedLabels = (int) Math.round(neighborLabels / total); // Normalise distribution if (total > 0) { Utils.normalize(confidences, total); } return confidences; }
From source file:mulan.classifier.lazy.IBLR_ML.java
License:Open Source License
protected MultiLabelOutput makePredictionInternal(Instance instance) throws Exception { double[] conf_corrected = new double[numLabels]; double[] confidences = new double[numLabels]; Instances knn = new Instances(lnn.kNearestNeighbours(instance, numOfNeighbors)); /*/* w ww . ja v a2 s .c o m*/ * Get the label confidence vector. */ for (int i = 0; i < numLabels; i++) { // compute sum of counts for each label in KNN double count_for_label_i = 0; for (int k = 0; k < numOfNeighbors; k++) { double value = Double.parseDouble( train.attribute(labelIndices[i]).value((int) knn.instance(k).value(labelIndices[i]))); if (Utils.eq(value, 1.0)) { count_for_label_i++; } } confidences[i] = count_for_label_i / numOfNeighbors; } double[] attvalue = new double[numLabels + 1]; if (addFeatures == true) { attvalue = new double[instance.numAttributes() + 1]; // Copy the original features for (int m = 0; m < featureIndices.length; m++) { attvalue[m] = instance.value(featureIndices[m]); } // Copy the label confidences as additional features for (int m = 0; m < confidences.length; m++) { attvalue[train.numAttributes() - numLabels + m] = confidences[m]; } } else { // Copy the label confidences as additional features for (int m = 0; m < confidences.length; m++) { attvalue[m] = confidences[m]; } } // Add the class labels and finish the new training data for (int j = 0; j < numLabels; j++) { attvalue[attvalue.length - 1] = instance.value(train.numAttributes() - numLabels + j); Instance newInst = DataUtils.createInstance(instance, 1, attvalue); conf_corrected[j] = classifier[j].distributionForInstance(newInst)[1]; } MultiLabelOutput mlo = new MultiLabelOutput(conf_corrected, 0.5); return mlo; }
From source file:mulan.classifier.meta.ConstrainedKMeans.java
License:Open Source License
/** * Updates the minimum and maximum values for all the attributes * based on a new instance./*from ww w.j a va 2 s . c om*/ * * @param instance the new instance */ private void updateMinMax(Instance instance) { for (int j = 0; j < m_ClusterCentroids.numAttributes(); j++) { if (!instance.isMissing(j)) { if (Double.isNaN(m_Min[j])) { m_Min[j] = instance.value(j); m_Max[j] = instance.value(j); } else { if (instance.value(j) < m_Min[j]) { m_Min[j] = instance.value(j); } else { if (instance.value(j) > m_Max[j]) { m_Max[j] = instance.value(j); } } } } } }
From source file:mulan.classifier.meta.thresholding.MetaLabeler.java
License:Open Source License
private int countTrueLabels(Instance instance) { int numTrueLabels = 0; for (int i = 0; i < numLabels; i++) { int labelIndice = labelIndices[i]; if (instance.dataset().attribute(labelIndice).value((int) instance.value(labelIndice)).equals("1")) { numTrueLabels++;//from w w w . j a v a 2s .c om } } return numTrueLabels; }
From source file:mulan.classifier.meta.thresholding.OneThreshold.java
License:Open Source License
/** * Evaluates the performance of the learner on a data set according to a * bipartition measure for a range of thresholds * * @param data the test data to evaluate different thresholds * @param measure the evaluation is based on this parameter * @param min the minimum threshold//from www .ja v a 2s.c o m * @param max the maximum threshold * @param the step to increase threshold from min to max * @return the optimal threshold * @throws Exception */ private double computeThreshold(MultiLabelLearner learner, MultiLabelInstances data, BipartitionMeasureBase measure, double min, double step, double max) throws Exception { int numOfThresholds = (int) Math.rint((max - min) / step + 1); double[] performance = new double[numOfThresholds]; BipartitionMeasureBase[] measureForThreshold = new BipartitionMeasureBase[numOfThresholds]; for (int i = 0; i < numOfThresholds; i++) { measureForThreshold[i] = (BipartitionMeasureBase) measure.makeCopy(); measureForThreshold[i].reset(); } boolean[] thresholdHasProblem = new boolean[numOfThresholds]; Arrays.fill(thresholdHasProblem, false); for (int j = 0; j < data.getNumInstances(); j++) { Instance instance = data.getDataSet().instance(j); if (data.hasMissingLabels(instance)) { continue; } MultiLabelOutput mlo = learner.makePrediction(instance); boolean[] trueLabels = new boolean[numLabels]; for (int counter = 0; counter < numLabels; counter++) { int classIdx = labelIndices[counter]; String classValue = instance.attribute(classIdx).value((int) instance.value(classIdx)); trueLabels[counter] = classValue.equals("1"); } double[] confidences = mlo.getConfidences(); int counter = 0; double currentThreshold = min; while (currentThreshold <= max) { boolean[] bipartition = new boolean[numLabels]; for (int k = 0; k < numLabels; k++) { if (confidences[k] >= currentThreshold) { bipartition[k] = true; } } try { MultiLabelOutput temp = new MultiLabelOutput(bipartition); measureForThreshold[counter].update(temp, trueLabels); } catch (MulanRuntimeException e) { thresholdHasProblem[counter] = true; } currentThreshold += step; counter++; } } for (int i = 0; i < numOfThresholds; i++) { if (!thresholdHasProblem[i]) performance[i] = Math.abs(measure.getIdealValue() - measureForThreshold[i].getValue()); else performance[i] = Double.MAX_VALUE; } return min + Utils.minIndex(performance) * step; }
From source file:mulan.classifier.meta.thresholding.RCut.java
License:Open Source License
/** * Evaluates the performance of different threshold values * * @param data the test data to evaluate different thresholds * @param measure the evaluation is based on this parameter * @return the sum of differences from the optimal value of the measure for * each instance and threshold/* w w w . j a v a 2 s . c o m*/ * @throws Exception */ private double[] computeThreshold(MultiLabelLearner learner, MultiLabelInstances data, BipartitionMeasureBase measure) throws Exception { double[] diff = new double[numLabels + 1]; measure.reset(); for (int j = 0; j < data.getNumInstances(); j++) { Instance instance = data.getDataSet().instance(j); if (data.hasMissingLabels(instance)) { continue; } MultiLabelOutput mlo = learner.makePrediction(instance); boolean[] trueLabels = new boolean[numLabels]; for (int counter = 0; counter < numLabels; counter++) { int classIdx = labelIndices[counter]; String classValue = instance.attribute(classIdx).value((int) instance.value(classIdx)); trueLabels[counter] = classValue.equals("1"); } int[] ranking = mlo.getRanking(); for (int threshold = 0; threshold <= numLabels; threshold++) { boolean[] bipartition = new boolean[numLabels]; for (int k = 0; k < numLabels; k++) { if (ranking[k] <= threshold) { bipartition[k] = true; } } // this doesn't work with label-based measures // diff[threshold] += Math.abs(measure.getIdealValue() - measure.updateBipartition(bipartition, trueLabels)); } } return diff; }
From source file:mulan.classifier.neural.BPMLL.java
License:Open Source License
public MultiLabelOutput makePredictionInternal(Instance instance) throws InvalidDataException { Instance inputInstance = null; if (nominalToBinaryFilter != null) { try {//w w w . j av a 2s. c o m nominalToBinaryFilter.input(instance); inputInstance = nominalToBinaryFilter.output(); inputInstance.setDataset(null); } catch (Exception ex) { throw new InvalidDataException("The input instance for prediction is invalid. " + "Instance is not consistent with the data the model was built for."); } } else { inputInstance = DataUtils.createInstance(instance, instance.weight(), instance.toDoubleArray()); } int numAttributes = inputInstance.numAttributes(); if (numAttributes < model.getNetInputSize()) { throw new InvalidDataException("Input instance do not have enough attributes " + "to be processed by the model. Instance is not consistent with the data the model was built for."); } // if instance has more attributes than model input, we assume that true outputs // are there, so we remove them List<Integer> someLabelIndices = new ArrayList<Integer>(); boolean labelsAreThere = false; if (numAttributes > model.getNetInputSize()) { for (int index : this.labelIndices) { someLabelIndices.add(index); } labelsAreThere = true; } if (normalizeAttributes) { normalizer.normalize(inputInstance); } int inputDim = model.getNetInputSize(); double[] inputPattern = new double[inputDim]; int indexCounter = 0; for (int attrIndex = 0; attrIndex < numAttributes; attrIndex++) { if (labelsAreThere && someLabelIndices.contains(attrIndex)) { continue; } inputPattern[indexCounter] = inputInstance.value(attrIndex); indexCounter++; } double[] labelConfidences = model.feedForward(inputPattern); double threshold = thresholdF.computeThreshold(labelConfidences); boolean[] labelPredictions = new boolean[numLabels]; Arrays.fill(labelPredictions, false); for (int labelIndex = 0; labelIndex < numLabels; labelIndex++) { if (labelConfidences[labelIndex] > threshold) { labelPredictions[labelIndex] = true; } // translate from bipolar output to binary labelConfidences[labelIndex] = (labelConfidences[labelIndex] + 1) / 2; } MultiLabelOutput mlo = new MultiLabelOutput(labelPredictions, labelConfidences); return mlo; }
From source file:mulan.classifier.neural.DataPair.java
License:Open Source License
/** * Creates a {@link DataPair} representation for each {@link Instance} contained in * {@link MultiLabelInstances} data set. The {@link DataPair} is a light weight representation * of instance values (by double values), which is useful when iteration over the data and its * values.//from w w w . ja va 2 s . c om * * @param mlDataSet the {@link MultiLabelInstances} which content has to be * converted to list of {@link DataPair} * @param bipolarOutput indicates whether output values should be converted * to bipolar values, or left intact as binary * @return the list of data pairs */ // TODO: this method should be in some kind of "data utils". public static List<DataPair> createDataPairs(MultiLabelInstances mlDataSet, boolean bipolarOutput) { Instances data = mlDataSet.getDataSet(); int[] featureIndices = mlDataSet.getFeatureIndices(); int[] labelIndices = mlDataSet.getLabelIndices(); int numFeatures = featureIndices.length; int numLabels = mlDataSet.getNumLabels(); int numInstances = data.numInstances(); List<DataPair> dataPairs = new ArrayList<DataPair>(numInstances); for (int index = 0; index < numInstances; index++) { Instance instance = data.instance(index); double[] input = new double[numFeatures]; for (int i = 0; i < numFeatures; i++) { int featureIndex = featureIndices[i]; Attribute featureAttr = instance.attribute(featureIndex); // if attribute is binary, parse the string value ... it is expected to be '0' or '1' if (featureAttr.isNominal() && featureAttr.numValues() == 2) { input[i] = Double.parseDouble(instance.stringValue(featureIndex)); } // else : // a) the attribute is nominal with multiple values, use indexes as nominal values // do not have to be numbers in general ... this is fall-back ... should be rare case // b) is numeric attribute else { input[i] = instance.value(featureIndex); } } if (mlDataSet.hasMissingLabels(instance)) continue; double[] output = new double[numLabels]; for (int i = 0; i < numLabels; i++) { output[i] = Double .parseDouble(data.attribute(labelIndices[i]).value((int) instance.value(labelIndices[i]))); if (bipolarOutput && output[i] == 0) { output[i] = -1; } } dataPairs.add(new DataPair(input, output)); } return dataPairs; }
From source file:mulan.classifier.neural.MMPLearner.java
License:Open Source License
private double[] getFeatureVector(Instance inputInstance) { if (convertNomToBin && nomToBinFilter != null) { try {//from w ww. j a va 2 s.c o m nomToBinFilter.input(inputInstance); inputInstance = nomToBinFilter.output(); inputInstance.setDataset(null); } catch (Exception ex) { throw new InvalidDataException("The input instance for prediction is invalid. " + "Instance is not consistent with the data the model was built for."); } } // check if number in attributes is at least equal to model input int numAttributes = inputInstance.numAttributes(); int modelInputDim = perceptrons.get(0).getWeights().length - 1; if (numAttributes < modelInputDim) { throw new InvalidDataException("Input instance do not have enough attributes " + "to be processed by the model. Instance is not consistent with the data the model was built for."); } // if instance has more attributes than model input, we assume that true outputs // are there, so we remove them List<Integer> labelIndices = new ArrayList<Integer>(); boolean labelsAreThere = false; if (numAttributes > modelInputDim) { for (int index : this.labelIndices) { labelIndices.add(index); } labelsAreThere = true; } double[] inputPattern = new double[modelInputDim]; int indexCounter = 0; for (int attrIndex = 0; attrIndex < numAttributes; attrIndex++) { if (labelsAreThere && labelIndices.contains(attrIndex)) { continue; } inputPattern[indexCounter] = inputInstance.value(attrIndex); indexCounter++; } return inputPattern; }
From source file:mulan.classifier.neural.NormalizationFilter.java
License:Open Source License
/** * Performs a normalization of numerical attributes on given instance. * The instance must conform to format of instances data the {@link NormalizationFilter} * was initialized with./*from w ww .ja va2 s . c om*/ * @param instance the instance to be normalized */ public void normalize(Instance instance) { Set<Integer> normScope = attStats.keySet(); for (Integer attIndex : normScope) { double[] stats = attStats.get(attIndex); double attMin = stats[0]; double attMax = stats[1]; double value = instance.value(attIndex); if (attMin == attMax) { instance.setValue(attIndex, minValue); } else { instance.setValue(attIndex, (((value - stats[0]) / (stats[1] - stats[0])) * (maxValue - minValue)) + minValue); } } }