Example usage for weka.core Instance value

List of usage examples for weka.core Instance value

Introduction

In this page you can find the example usage for weka.core Instance value.

Prototype

public double value(Attribute att);

Source Link

Document

Returns an instance's attribute value in internal format.

Usage

From source file:mulan.classifier.lazy.BRkNN.java

License:Open Source License

/**
 * Calculates the confidences of the labels, based on the neighboring
 * instances//  w w  w  . jav a 2  s .  c  o m
 *
 * @param neighbours
 *            the list of nearest neighboring instances
 * @param distances
 *            the distances of the neighbors
 * @return the confidences of the labels
 */
private double[] getConfidences(Instances neighbours, double[] distances) {
    double total = 0, weight;
    double neighborLabels = 0;
    double[] confidences = new double[numLabels];

    // Set up a correction to the estimator
    for (int i = 0; i < numLabels; i++) {
        confidences[i] = 1.0 / Math.max(1, train.numInstances());
    }
    total = (double) numLabels / Math.max(1, train.numInstances());

    for (int i = 0; i < neighbours.numInstances(); i++) {
        // Collect class counts
        Instance current = neighbours.instance(i);
        distances[i] = distances[i] * distances[i];
        distances[i] = Math.sqrt(distances[i] / (train.numAttributes() - numLabels));
        switch (distanceWeighting) {
        case WEIGHT_INVERSE:
            weight = 1.0 / (distances[i] + 0.001); // to avoid division by
            // zero
            break;
        case WEIGHT_SIMILARITY:
            weight = 1.0 - distances[i];
            break;
        default: // WEIGHT_NONE:
            weight = 1.0;
            break;
        }
        weight *= current.weight();

        for (int j = 0; j < numLabels; j++) {
            double value = Double.parseDouble(
                    current.attribute(labelIndices[j]).value((int) current.value(labelIndices[j])));
            if (Utils.eq(value, 1.0)) {
                confidences[j] += weight;
                neighborLabels += weight;
            }
        }
        total += weight;
    }

    avgPredictedLabels = (int) Math.round(neighborLabels / total);
    // Normalise distribution
    if (total > 0) {
        Utils.normalize(confidences, total);
    }
    return confidences;
}

From source file:mulan.classifier.lazy.IBLR_ML.java

License:Open Source License

protected MultiLabelOutput makePredictionInternal(Instance instance) throws Exception {

    double[] conf_corrected = new double[numLabels];
    double[] confidences = new double[numLabels];

    Instances knn = new Instances(lnn.kNearestNeighbours(instance, numOfNeighbors));

    /*/* w  ww  . ja v a2 s  .c o m*/
     * Get the label confidence vector.
     */
    for (int i = 0; i < numLabels; i++) {
        // compute sum of counts for each label in KNN
        double count_for_label_i = 0;
        for (int k = 0; k < numOfNeighbors; k++) {
            double value = Double.parseDouble(
                    train.attribute(labelIndices[i]).value((int) knn.instance(k).value(labelIndices[i])));
            if (Utils.eq(value, 1.0)) {
                count_for_label_i++;
            }
        }

        confidences[i] = count_for_label_i / numOfNeighbors;

    }

    double[] attvalue = new double[numLabels + 1];

    if (addFeatures == true) {
        attvalue = new double[instance.numAttributes() + 1];

        // Copy the original features
        for (int m = 0; m < featureIndices.length; m++) {
            attvalue[m] = instance.value(featureIndices[m]);
        }
        // Copy the label confidences as additional features
        for (int m = 0; m < confidences.length; m++) {
            attvalue[train.numAttributes() - numLabels + m] = confidences[m];
        }
    } else {
        // Copy the label confidences as additional features
        for (int m = 0; m < confidences.length; m++) {
            attvalue[m] = confidences[m];
        }
    }

    // Add the class labels and finish the new training data
    for (int j = 0; j < numLabels; j++) {
        attvalue[attvalue.length - 1] = instance.value(train.numAttributes() - numLabels + j);
        Instance newInst = DataUtils.createInstance(instance, 1, attvalue);
        conf_corrected[j] = classifier[j].distributionForInstance(newInst)[1];
    }

    MultiLabelOutput mlo = new MultiLabelOutput(conf_corrected, 0.5);
    return mlo;
}

From source file:mulan.classifier.meta.ConstrainedKMeans.java

License:Open Source License

/**
 * Updates the minimum and maximum values for all the attributes
 * based on a new instance./*from   ww  w.j  a va  2 s  . c om*/
 *
 * @param instance the new instance
 */
private void updateMinMax(Instance instance) {
    for (int j = 0; j < m_ClusterCentroids.numAttributes(); j++) {
        if (!instance.isMissing(j)) {
            if (Double.isNaN(m_Min[j])) {
                m_Min[j] = instance.value(j);
                m_Max[j] = instance.value(j);
            } else {
                if (instance.value(j) < m_Min[j]) {
                    m_Min[j] = instance.value(j);
                } else {
                    if (instance.value(j) > m_Max[j]) {
                        m_Max[j] = instance.value(j);
                    }
                }
            }
        }
    }
}

From source file:mulan.classifier.meta.thresholding.MetaLabeler.java

License:Open Source License

private int countTrueLabels(Instance instance) {
    int numTrueLabels = 0;
    for (int i = 0; i < numLabels; i++) {
        int labelIndice = labelIndices[i];
        if (instance.dataset().attribute(labelIndice).value((int) instance.value(labelIndice)).equals("1")) {
            numTrueLabels++;//from  w  w w  . j a  v a 2s .c  om
        }
    }
    return numTrueLabels;
}

From source file:mulan.classifier.meta.thresholding.OneThreshold.java

License:Open Source License

/**
 * Evaluates the performance of the learner on a data set according to a 
 * bipartition measure for a range of thresholds
 *
 * @param data the test data to evaluate different thresholds
 * @param measure the evaluation is based on this parameter
 * @param min the minimum threshold//from   www .ja v a 2s.c  o  m
 * @param max the maximum threshold
 * @param the step to increase threshold from min to max
 * @return the optimal threshold
 * @throws Exception
 */
private double computeThreshold(MultiLabelLearner learner, MultiLabelInstances data,
        BipartitionMeasureBase measure, double min, double step, double max) throws Exception {
    int numOfThresholds = (int) Math.rint((max - min) / step + 1);
    double[] performance = new double[numOfThresholds];
    BipartitionMeasureBase[] measureForThreshold = new BipartitionMeasureBase[numOfThresholds];
    for (int i = 0; i < numOfThresholds; i++) {
        measureForThreshold[i] = (BipartitionMeasureBase) measure.makeCopy();
        measureForThreshold[i].reset();
    }

    boolean[] thresholdHasProblem = new boolean[numOfThresholds];
    Arrays.fill(thresholdHasProblem, false);

    for (int j = 0; j < data.getNumInstances(); j++) {
        Instance instance = data.getDataSet().instance(j);

        if (data.hasMissingLabels(instance)) {
            continue;
        }

        MultiLabelOutput mlo = learner.makePrediction(instance);

        boolean[] trueLabels = new boolean[numLabels];
        for (int counter = 0; counter < numLabels; counter++) {
            int classIdx = labelIndices[counter];
            String classValue = instance.attribute(classIdx).value((int) instance.value(classIdx));
            trueLabels[counter] = classValue.equals("1");
        }

        double[] confidences = mlo.getConfidences();

        int counter = 0;
        double currentThreshold = min;
        while (currentThreshold <= max) {
            boolean[] bipartition = new boolean[numLabels];
            for (int k = 0; k < numLabels; k++) {
                if (confidences[k] >= currentThreshold) {
                    bipartition[k] = true;
                }
            }
            try {
                MultiLabelOutput temp = new MultiLabelOutput(bipartition);
                measureForThreshold[counter].update(temp, trueLabels);
            } catch (MulanRuntimeException e) {
                thresholdHasProblem[counter] = true;
            }
            currentThreshold += step;
            counter++;
        }
    }

    for (int i = 0; i < numOfThresholds; i++) {
        if (!thresholdHasProblem[i])
            performance[i] = Math.abs(measure.getIdealValue() - measureForThreshold[i].getValue());
        else
            performance[i] = Double.MAX_VALUE;
    }

    return min + Utils.minIndex(performance) * step;
}

From source file:mulan.classifier.meta.thresholding.RCut.java

License:Open Source License

/**
 * Evaluates the performance of different threshold values
 *
 * @param data the test data to evaluate different thresholds
 * @param measure the evaluation is based on this parameter
 * @return the sum of differences from the optimal value of the measure for
 * each instance and threshold/*  w w w .  j a  v  a  2  s  .  c  o  m*/
 * @throws Exception
 */
private double[] computeThreshold(MultiLabelLearner learner, MultiLabelInstances data,
        BipartitionMeasureBase measure) throws Exception {
    double[] diff = new double[numLabels + 1];
    measure.reset();
    for (int j = 0; j < data.getNumInstances(); j++) {
        Instance instance = data.getDataSet().instance(j);

        if (data.hasMissingLabels(instance)) {
            continue;
        }

        MultiLabelOutput mlo = learner.makePrediction(instance);

        boolean[] trueLabels = new boolean[numLabels];
        for (int counter = 0; counter < numLabels; counter++) {
            int classIdx = labelIndices[counter];
            String classValue = instance.attribute(classIdx).value((int) instance.value(classIdx));
            trueLabels[counter] = classValue.equals("1");
        }

        int[] ranking = mlo.getRanking();
        for (int threshold = 0; threshold <= numLabels; threshold++) {
            boolean[] bipartition = new boolean[numLabels];
            for (int k = 0; k < numLabels; k++) {
                if (ranking[k] <= threshold) {
                    bipartition[k] = true;
                }
            }
            // this doesn't work with label-based measures
            //                diff[threshold] += Math.abs(measure.getIdealValue() - measure.updateBipartition(bipartition, trueLabels));
        }
    }
    return diff;
}

From source file:mulan.classifier.neural.BPMLL.java

License:Open Source License

public MultiLabelOutput makePredictionInternal(Instance instance) throws InvalidDataException {

    Instance inputInstance = null;
    if (nominalToBinaryFilter != null) {
        try {//w  w  w  . j av a  2s.  c o m
            nominalToBinaryFilter.input(instance);
            inputInstance = nominalToBinaryFilter.output();
            inputInstance.setDataset(null);
        } catch (Exception ex) {
            throw new InvalidDataException("The input instance for prediction is invalid. "
                    + "Instance is not consistent with the data the model was built for.");
        }
    } else {
        inputInstance = DataUtils.createInstance(instance, instance.weight(), instance.toDoubleArray());
    }

    int numAttributes = inputInstance.numAttributes();
    if (numAttributes < model.getNetInputSize()) {
        throw new InvalidDataException("Input instance do not have enough attributes "
                + "to be processed by the model. Instance is not consistent with the data the model was built for.");
    }

    // if instance has more attributes than model input, we assume that true outputs
    // are there, so we remove them
    List<Integer> someLabelIndices = new ArrayList<Integer>();
    boolean labelsAreThere = false;
    if (numAttributes > model.getNetInputSize()) {
        for (int index : this.labelIndices) {
            someLabelIndices.add(index);
        }

        labelsAreThere = true;
    }

    if (normalizeAttributes) {
        normalizer.normalize(inputInstance);
    }

    int inputDim = model.getNetInputSize();
    double[] inputPattern = new double[inputDim];
    int indexCounter = 0;
    for (int attrIndex = 0; attrIndex < numAttributes; attrIndex++) {
        if (labelsAreThere && someLabelIndices.contains(attrIndex)) {
            continue;
        }
        inputPattern[indexCounter] = inputInstance.value(attrIndex);
        indexCounter++;
    }

    double[] labelConfidences = model.feedForward(inputPattern);
    double threshold = thresholdF.computeThreshold(labelConfidences);
    boolean[] labelPredictions = new boolean[numLabels];
    Arrays.fill(labelPredictions, false);

    for (int labelIndex = 0; labelIndex < numLabels; labelIndex++) {
        if (labelConfidences[labelIndex] > threshold) {
            labelPredictions[labelIndex] = true;
        }
        // translate from bipolar output to binary
        labelConfidences[labelIndex] = (labelConfidences[labelIndex] + 1) / 2;
    }

    MultiLabelOutput mlo = new MultiLabelOutput(labelPredictions, labelConfidences);
    return mlo;
}

From source file:mulan.classifier.neural.DataPair.java

License:Open Source License

/**
 * Creates a {@link DataPair} representation for each {@link Instance} contained in
 * {@link MultiLabelInstances} data set. The {@link DataPair} is a light weight representation
 * of instance values (by double values), which is useful when iteration over the data and its
 * values.//from  w  w w .  ja  va 2  s  .  c om
 *
 * @param mlDataSet the {@link MultiLabelInstances} which content has to be
 *          converted to list of {@link DataPair}
 * @param bipolarOutput indicates whether output values should be converted
 *          to bipolar values, or left intact as binary
 * @return the list of data pairs
 */
// TODO: this method should be in some kind of "data utils".
public static List<DataPair> createDataPairs(MultiLabelInstances mlDataSet, boolean bipolarOutput) {

    Instances data = mlDataSet.getDataSet();
    int[] featureIndices = mlDataSet.getFeatureIndices();
    int[] labelIndices = mlDataSet.getLabelIndices();
    int numFeatures = featureIndices.length;
    int numLabels = mlDataSet.getNumLabels();

    int numInstances = data.numInstances();
    List<DataPair> dataPairs = new ArrayList<DataPair>(numInstances);
    for (int index = 0; index < numInstances; index++) {
        Instance instance = data.instance(index);
        double[] input = new double[numFeatures];
        for (int i = 0; i < numFeatures; i++) {
            int featureIndex = featureIndices[i];
            Attribute featureAttr = instance.attribute(featureIndex);
            // if attribute is binary, parse the string value ... it is expected to be '0' or '1'
            if (featureAttr.isNominal() && featureAttr.numValues() == 2) {
                input[i] = Double.parseDouble(instance.stringValue(featureIndex));
            } // else :
              // a) the attribute is nominal with multiple values, use indexes as nominal values
              //    do not have to be numbers in general ... this is fall-back ... should be rare case
              // b) is numeric attribute
            else {
                input[i] = instance.value(featureIndex);
            }
        }

        if (mlDataSet.hasMissingLabels(instance))
            continue;

        double[] output = new double[numLabels];
        for (int i = 0; i < numLabels; i++) {
            output[i] = Double
                    .parseDouble(data.attribute(labelIndices[i]).value((int) instance.value(labelIndices[i])));
            if (bipolarOutput && output[i] == 0) {
                output[i] = -1;
            }
        }

        dataPairs.add(new DataPair(input, output));
    }

    return dataPairs;
}

From source file:mulan.classifier.neural.MMPLearner.java

License:Open Source License

private double[] getFeatureVector(Instance inputInstance) {

    if (convertNomToBin && nomToBinFilter != null) {
        try {//from w  ww.  j  a  va  2  s.c  o  m
            nomToBinFilter.input(inputInstance);
            inputInstance = nomToBinFilter.output();
            inputInstance.setDataset(null);
        } catch (Exception ex) {
            throw new InvalidDataException("The input instance for prediction is invalid. "
                    + "Instance is not consistent with the data the model was built for.");
        }
    }

    // check if number in attributes is at least equal to model input
    int numAttributes = inputInstance.numAttributes();
    int modelInputDim = perceptrons.get(0).getWeights().length - 1;
    if (numAttributes < modelInputDim) {
        throw new InvalidDataException("Input instance do not have enough attributes "
                + "to be processed by the model. Instance is not consistent with the data the model was built for.");
    }

    // if instance has more attributes than model input, we assume that true outputs
    // are there, so we remove them
    List<Integer> labelIndices = new ArrayList<Integer>();
    boolean labelsAreThere = false;
    if (numAttributes > modelInputDim) {
        for (int index : this.labelIndices) {
            labelIndices.add(index);
        }

        labelsAreThere = true;
    }

    double[] inputPattern = new double[modelInputDim];
    int indexCounter = 0;
    for (int attrIndex = 0; attrIndex < numAttributes; attrIndex++) {
        if (labelsAreThere && labelIndices.contains(attrIndex)) {
            continue;
        }
        inputPattern[indexCounter] = inputInstance.value(attrIndex);
        indexCounter++;
    }

    return inputPattern;
}

From source file:mulan.classifier.neural.NormalizationFilter.java

License:Open Source License

/**
 * Performs a normalization of numerical attributes on given instance.
 * The instance must conform to format of instances data the {@link NormalizationFilter}
 * was initialized with./*from   w  ww  .ja va2 s . c om*/
 * @param instance the instance to be normalized
 */
public void normalize(Instance instance) {
    Set<Integer> normScope = attStats.keySet();
    for (Integer attIndex : normScope) {
        double[] stats = attStats.get(attIndex);
        double attMin = stats[0];
        double attMax = stats[1];
        double value = instance.value(attIndex);
        if (attMin == attMax) {
            instance.setValue(attIndex, minValue);
        } else {
            instance.setValue(attIndex,
                    (((value - stats[0]) / (stats[1] - stats[0])) * (maxValue - minValue)) + minValue);
        }
    }
}