Example usage for weka.core Instance value

Introduction

In this page you can find the example usage for weka.core Instance value.

Prototype

public double value(Attribute att);

Source Link

Document

Returns an instance's attribute value in internal format.

Usage

From source file:mulan.classifier.lazy.BRkNN.java

License:Open Source License

/**
 * Calculates the confidences of the labels, based on the neighboring
 * instances//  w w  w  . jav a 2  s .  c  o m
 *
 * @param neighbours
 *            the list of nearest neighboring instances
 * @param distances
 *            the distances of the neighbors
 * @return the confidences of the labels
 */
private double[] getConfidences(Instances neighbours, double[] distances) {
    double total = 0, weight;
    double neighborLabels = 0;
    double[] confidences = new double[numLabels];

    // Set up a correction to the estimator
    for (int i = 0; i < numLabels; i++) {
        confidences[i] = 1.0 / Math.max(1, train.numInstances());
    }
    total = (double) numLabels / Math.max(1, train.numInstances());

    for (int i = 0; i < neighbours.numInstances(); i++) {
        // Collect class counts
        Instance current = neighbours.instance(i);
        distances[i] = distances[i] * distances[i];
        distances[i] = Math.sqrt(distances[i] / (train.numAttributes() - numLabels));
        switch (distanceWeighting) {
        case WEIGHT_INVERSE:
            weight = 1.0 / (distances[i] + 0.001); // to avoid division by
            // zero
            break;
        case WEIGHT_SIMILARITY:
            weight = 1.0 - distances[i];
            break;
        default: // WEIGHT_NONE:
            weight = 1.0;
            break;
        }
        weight *= current.weight();

        for (int j = 0; j < numLabels; j++) {
            double value = Double.parseDouble(
                    current.attribute(labelIndices[j]).value((int) current.value(labelIndices[j])));
            if (Utils.eq(value, 1.0)) {
                confidences[j] += weight;
                neighborLabels += weight;
            }
        }
        total += weight;
    }

    avgPredictedLabels = (int) Math.round(neighborLabels / total);
    // Normalise distribution
    if (total > 0) {
        Utils.normalize(confidences, total);
    }
    return confidences;
}

From source file:mulan.classifier.lazy.IBLR_ML.java

License:Open Source License

protected MultiLabelOutput makePredictionInternal(Instance instance) throws Exception {

    double[] conf_corrected = new double[numLabels];
    double[] confidences = new double[numLabels];

    Instances knn = new Instances(lnn.kNearestNeighbours(instance, numOfNeighbors));

    /*/* w  ww  . ja v a2 s  .c o m*/
     * Get the label confidence vector.
     */
    for (int i = 0; i < numLabels; i++) {
        // compute sum of counts for each label in KNN
        double count_for_label_i = 0;
        for (int k = 0; k < numOfNeighbors; k++) {
            double value = Double.parseDouble(
                    train.attribute(labelIndices[i]).value((int) knn.instance(k).value(labelIndices[i])));
            if (Utils.eq(value, 1.0)) {
                count_for_label_i++;
            }
        }

        confidences[i] = count_for_label_i / numOfNeighbors;

    }

    double[] attvalue = new double[numLabels + 1];

    if (addFeatures == true) {
        attvalue = new double[instance.numAttributes() + 1];

        // Copy the original features
        for (int m = 0; m < featureIndices.length; m++) {
            attvalue[m] = instance.value(featureIndices[m]);
        }
        // Copy the label confidences as additional features
        for (int m = 0; m < confidences.length; m++) {
            attvalue[train.numAttributes() - numLabels + m] = confidences[m];
        }
    } else {
        // Copy the label confidences as additional features
        for (int m = 0; m < confidences.length; m++) {
            attvalue[m] = confidences[m];
        }
    }

    // Add the class labels and finish the new training data
    for (int j = 0; j < numLabels; j++) {
        attvalue[attvalue.length - 1] = instance.value(train.numAttributes() - numLabels + j);
        Instance newInst = DataUtils.createInstance(instance, 1, attvalue);
        conf_corrected[j] = classifier[j].distributionForInstance(newInst)[1];
    }

    MultiLabelOutput mlo = new MultiLabelOutput(conf_corrected, 0.5);
    return mlo;
}

From source file:mulan.classifier.meta.ConstrainedKMeans.java

License:Open Source License

/**
 * Updates the minimum and maximum values for all the attributes
 * based on a new instance./*from   ww  w.j  a va  2 s  . c om*/
 *
 * @param instance the new instance
 */
private void updateMinMax(Instance instance) {
    for (int j = 0; j < m_ClusterCentroids.numAttributes(); j++) {
        if (!instance.isMissing(j)) {
            if (Double.isNaN(m_Min[j])) {
                m_Min[j] = instance.value(j);
                m_Max[j] = instance.value(j);
            } else {
                if (instance.value(j) < m_Min[j]) {
                    m_Min[j] = instance.value(j);
                } else {
                    if (instance.value(j) > m_Max[j]) {
                        m_Max[j] = instance.value(j);
                    }
                }
            }
        }
    }
}

From source file:mulan.classifier.meta.thresholding.MetaLabeler.java

License:Open Source License

private int countTrueLabels(Instance instance) {
    int numTrueLabels = 0;
    for (int i = 0; i < numLabels; i++) {
        int labelIndice = labelIndices[i];
        if (instance.dataset().attribute(labelIndice).value((int) instance.value(labelIndice)).equals("1")) {
            numTrueLabels++;//from  w  w w  . j a  v a 2s .c  om
        }
    }
    return numTrueLabels;
}

From source file:mulan.classifier.meta.thresholding.OneThreshold.java

License:Open Source License

/**
 * Evaluates the performance of the learner on a data set according to a 
 * bipartition measure for a range of thresholds
 *
 * @param data the test data to evaluate different thresholds
 * @param measure the evaluation is based on this parameter
 * @param min the minimum threshold//from   www .ja v a 2s.c  o  m
 * @param max the maximum threshold
 * @param the step to increase threshold from min to max
 * @return the optimal threshold
 * @throws Exception
 */
private double computeThreshold(MultiLabelLearner learner, MultiLabelInstances data,
        BipartitionMeasureBase measure, double min, double step, double max) throws Exception {
    int numOfThresholds = (int) Math.rint((max - min) / step + 1);
    double[] performance = new double[numOfThresholds];
    BipartitionMeasureBase[] measureForThreshold = new BipartitionMeasureBase[numOfThresholds];
    for (int i = 0; i < numOfThresholds; i++) {
        measureForThreshold[i] = (BipartitionMeasureBase) measure.makeCopy();
        measureForThreshold[i].reset();
    }

    boolean[] thresholdHasProblem = new boolean[numOfThresholds];
    Arrays.fill(thresholdHasProblem, false);

    for (int j = 0; j < data.getNumInstances(); j++) {
        Instance instance = data.getDataSet().instance(j);

        if (data.hasMissingLabels(instance)) {
            continue;
        }

        MultiLabelOutput mlo = learner.makePrediction(instance);

        boolean[] trueLabels = new boolean[numLabels];
        for (int counter = 0; counter < numLabels; counter++) {
            int classIdx = labelIndices[counter];
            String classValue = instance.attribute(classIdx).value((int) instance.value(classIdx));
            trueLabels[counter] = classValue.equals("1");
        }

        double[] confidences = mlo.getConfidences();

        int counter = 0;
        double currentThreshold = min;
        while (currentThreshold <= max) {
            boolean[] bipartition = new boolean[numLabels];
            for (int k = 0; k < numLabels; k++) {
                if (confidences[k] >= currentThreshold) {
                    bipartition[k] = true;
                }
            }
            try {
                MultiLabelOutput temp = new MultiLabelOutput(bipartition);
                measureForThreshold[counter].update(temp, trueLabels);
            } catch (MulanRuntimeException e) {
                thresholdHasProblem[counter] = true;
            }
            currentThreshold += step;
            counter++;
        }
    }

    for (int i = 0; i < numOfThresholds; i++) {
        if (!thresholdHasProblem[i])
            performance[i] = Math.abs(measure.getIdealValue() - measureForThreshold[i].getValue());
        else
            performance[i] = Double.MAX_VALUE;
    }

    return min + Utils.minIndex(performance) * step;
}

From source file:mulan.classifier.meta.thresholding.RCut.java

License:Open Source License

/**
 * Evaluates the performance of different threshold values
 *
 * @param data the test data to evaluate different thresholds
 * @param measure the evaluation is based on this parameter
 * @return the sum of differences from the optimal value of the measure for
 * each instance and threshold/*  w w w .  j a  v  a  2  s  .  c  o  m*/
 * @throws Exception
 */
private double[] computeThreshold(MultiLabelLearner learner, MultiLabelInstances data,
        BipartitionMeasureBase measure) throws Exception {
    double[] diff = new double[numLabels + 1];
    measure.reset();
    for (int j = 0; j < data.getNumInstances(); j++) {
        Instance instance = data.getDataSet().instance(j);

        if (data.hasMissingLabels(instance)) {
            continue;
        }

        MultiLabelOutput mlo = learner.makePrediction(instance);

        boolean[] trueLabels = new boolean[numLabels];
        for (int counter = 0; counter < numLabels; counter++) {
            int classIdx = labelIndices[counter];
            String classValue = instance.attribute(classIdx).value((int) instance.value(classIdx));
            trueLabels[counter] = classValue.equals("1");
        }

        int[] ranking = mlo.getRanking();
        for (int threshold = 0; threshold <= numLabels; threshold++) {
            boolean[] bipartition = new boolean[numLabels];
            for (int k = 0; k < numLabels; k++) {
                if (ranking[k] <= threshold) {
                    bipartition[k] = true;
                }
            }
            // this doesn't work with label-based measures
            //                diff[threshold] += Math.abs(measure.getIdealValue() - measure.updateBipartition(bipartition, trueLabels));
        }
    }
    return diff;
}

From source file:mulan.classifier.neural.BPMLL.java

License:Open Source License

public MultiLabelOutput makePredictionInternal(Instance instance) throws InvalidDataException {

    Instance inputInstance = null;
    if (nominalToBinaryFilter != null) {
        try {//w  w  w  . j av a  2s.  c o m
            nominalToBinaryFilter.input(instance);
            inputInstance = nominalToBinaryFilter.output();
            inputInstance.setDataset(null);
        } catch (Exception ex) {
            throw new InvalidDataException("The input instance for prediction is invalid. "
                    + "Instance is not consistent with the data the model was built for.");
        }
    } else {
        inputInstance = DataUtils.createInstance(instance, instance.weight(), instance.toDoubleArray());
    }

    int numAttributes = inputInstance.numAttributes();
    if (numAttributes < model.getNetInputSize()) {
        throw new InvalidDataException("Input instance do not have enough attributes "
                + "to be processed by the model. Instance is not consistent with the data the model was built for.");
    }

    // if instance has more attributes than model input, we assume that true outputs
    // are there, so we remove them
    List<Integer> someLabelIndices = new ArrayList<Integer>();
    boolean labelsAreThere = false;
    if (numAttributes > model.getNetInputSize()) {
        for (int index : this.labelIndices) {
            someLabelIndices.add(index);
        }

        labelsAreThere = true;
    }

    if (normalizeAttributes) {
        normalizer.normalize(inputInstance);
    }

    int inputDim = model.getNetInputSize();
    double[] inputPattern = new double[inputDim];
    int indexCounter = 0;
    for (int attrIndex = 0; attrIndex < numAttributes; attrIndex++) {
        if (labelsAreThere && someLabelIndices.contains(attrIndex)) {
            continue;
        }
        inputPattern[indexCounter] = inputInstance.value(attrIndex);
        indexCounter++;
    }

    double[] labelConfidences = model.feedForward(inputPattern);
    double threshold = thresholdF.computeThreshold(labelConfidences);
    boolean[] labelPredictions = new boolean[numLabels];
    Arrays.fill(labelPredictions, false);

    for (int labelIndex = 0; labelIndex < numLabels; labelIndex++) {
        if (labelConfidences[labelIndex] > threshold) {
            labelPredictions[labelIndex] = true;
        }
        // translate from bipolar output to binary
        labelConfidences[labelIndex] = (labelConfidences[labelIndex] + 1) / 2;
    }

    MultiLabelOutput mlo = new MultiLabelOutput(labelPredictions, labelConfidences);
    return mlo;
}

From source file:mulan.classifier.neural.DataPair.java

License:Open Source License

/**
 * Creates a {@link DataPair} representation for each {@link Instance} contained in
 * {@link MultiLabelInstances} data set. The {@link DataPair} is a light weight representation
 * of instance values (by double values), which is useful when iteration over the data and its
 * values.//from  w  w w .  ja  va 2  s  .  c om
 *
 * @param mlDataSet the {@link MultiLabelInstances} which content has to be
 *          converted to list of {@link DataPair}
 * @param bipolarOutput indicates whether output values should be converted
 *          to bipolar values, or left intact as binary
 * @return the list of data pairs
 */
// TODO: this method should be in some kind of "data utils".
public static List<DataPair> createDataPairs(MultiLabelInstances mlDataSet, boolean bipolarOutput) {

    Instances data = mlDataSet.getDataSet();
    int[] featureIndices = mlDataSet.getFeatureIndices();
    int[] labelIndices = mlDataSet.getLabelIndices();
    int numFeatures = featureIndices.length;
    int numLabels = mlDataSet.getNumLabels();

    int numInstances = data.numInstances();
    List<DataPair> dataPairs = new ArrayList<DataPair>(numInstances);
    for (int index = 0; index < numInstances; index++) {
        Instance instance = data.instance(index);
        double[] input = new double[numFeatures];
        for (int i = 0; i < numFeatures; i++) {
            int featureIndex = featureIndices[i];
            Attribute featureAttr = instance.attribute(featureIndex);
            // if attribute is binary, parse the string value ... it is expected to be '0' or '1'
            if (featureAttr.isNominal() && featureAttr.numValues() == 2) {
                input[i] = Double.parseDouble(instance.stringValue(featureIndex));
            } // else :
              // a) the attribute is nominal with multiple values, use indexes as nominal values
              //    do not have to be numbers in general ... this is fall-back ... should be rare case
              // b) is numeric attribute
            else {
                input[i] = instance.value(featureIndex);
            }
        }

        if (mlDataSet.hasMissingLabels(instance))
            continue;

        double[] output = new double[numLabels];
        for (int i = 0; i < numLabels; i++) {
            output[i] = Double
                    .parseDouble(data.attribute(labelIndices[i]).value((int) instance.value(labelIndices[i])));
            if (bipolarOutput && output[i] == 0) {
                output[i] = -1;
            }
        }

        dataPairs.add(new DataPair(input, output));
    }

    return dataPairs;
}

From source file:mulan.classifier.neural.MMPLearner.java

License:Open Source License

private double[] getFeatureVector(Instance inputInstance) {

    if (convertNomToBin && nomToBinFilter != null) {
        try {//from w  ww.  j  a  va  2  s.c  o  m
            nomToBinFilter.input(inputInstance);
            inputInstance = nomToBinFilter.output();
            inputInstance.setDataset(null);
        } catch (Exception ex) {
            throw new InvalidDataException("The input instance for prediction is invalid. "
                    + "Instance is not consistent with the data the model was built for.");
        }
    }

    // check if number in attributes is at least equal to model input
    int numAttributes = inputInstance.numAttributes();
    int modelInputDim = perceptrons.get(0).getWeights().length - 1;
    if (numAttributes < modelInputDim) {
        throw new InvalidDataException("Input instance do not have enough attributes "
                + "to be processed by the model. Instance is not consistent with the data the model was built for.");
    }

    // if instance has more attributes than model input, we assume that true outputs
    // are there, so we remove them
    List<Integer> labelIndices = new ArrayList<Integer>();
    boolean labelsAreThere = false;
    if (numAttributes > modelInputDim) {
        for (int index : this.labelIndices) {
            labelIndices.add(index);
        }

        labelsAreThere = true;
    }

    double[] inputPattern = new double[modelInputDim];
    int indexCounter = 0;
    for (int attrIndex = 0; attrIndex < numAttributes; attrIndex++) {
        if (labelsAreThere && labelIndices.contains(attrIndex)) {
            continue;
        }
        inputPattern[indexCounter] = inputInstance.value(attrIndex);
        indexCounter++;
    }

    return inputPattern;
}

From source file:mulan.classifier.neural.NormalizationFilter.java

License:Open Source License

/**
 * Performs a normalization of numerical attributes on given instance.
 * The instance must conform to format of instances data the {@link NormalizationFilter}
 * was initialized with./*from   w  ww  .ja va2 s . c om*/
 * @param instance the instance to be normalized
 */
public void normalize(Instance instance) {
    Set<Integer> normScope = attStats.keySet();
    for (Integer attIndex : normScope) {
        double[] stats = attStats.get(attIndex);
        double attMin = stats[0];
        double attMax = stats[1];
        double value = instance.value(attIndex);
        if (attMin == attMax) {
            instance.setValue(attIndex, minValue);
        } else {
            instance.setValue(attIndex,
                    (((value - stats[0]) / (stats[1] - stats[0])) * (maxValue - minValue)) + minValue);
        }
    }
}