Example usage for weka.core Instance attribute

List of usage examples for weka.core Instance attribute

Introduction

In this page you can find the example usage for weka.core Instance attribute.

Prototype

public Attribute attribute(int index);

Source Link

Document

Returns the attribute with the given index.

Usage

From source file:mulan.classifier.meta.thresholding.OneThreshold.java

License:Open Source License

/**
 * Evaluates the performance of the learner on a data set according to a 
 * bipartition measure for a range of thresholds
 *
 * @param data the test data to evaluate different thresholds
 * @param measure the evaluation is based on this parameter
 * @param min the minimum threshold//  www  .j  a  va2 s  . c o m
 * @param max the maximum threshold
 * @param the step to increase threshold from min to max
 * @return the optimal threshold
 * @throws Exception
 */
private double computeThreshold(MultiLabelLearner learner, MultiLabelInstances data,
        BipartitionMeasureBase measure, double min, double step, double max) throws Exception {
    int numOfThresholds = (int) Math.rint((max - min) / step + 1);
    double[] performance = new double[numOfThresholds];
    BipartitionMeasureBase[] measureForThreshold = new BipartitionMeasureBase[numOfThresholds];
    for (int i = 0; i < numOfThresholds; i++) {
        measureForThreshold[i] = (BipartitionMeasureBase) measure.makeCopy();
        measureForThreshold[i].reset();
    }

    boolean[] thresholdHasProblem = new boolean[numOfThresholds];
    Arrays.fill(thresholdHasProblem, false);

    for (int j = 0; j < data.getNumInstances(); j++) {
        Instance instance = data.getDataSet().instance(j);

        if (data.hasMissingLabels(instance)) {
            continue;
        }

        MultiLabelOutput mlo = learner.makePrediction(instance);

        boolean[] trueLabels = new boolean[numLabels];
        for (int counter = 0; counter < numLabels; counter++) {
            int classIdx = labelIndices[counter];
            String classValue = instance.attribute(classIdx).value((int) instance.value(classIdx));
            trueLabels[counter] = classValue.equals("1");
        }

        double[] confidences = mlo.getConfidences();

        int counter = 0;
        double currentThreshold = min;
        while (currentThreshold <= max) {
            boolean[] bipartition = new boolean[numLabels];
            for (int k = 0; k < numLabels; k++) {
                if (confidences[k] >= currentThreshold) {
                    bipartition[k] = true;
                }
            }
            try {
                MultiLabelOutput temp = new MultiLabelOutput(bipartition);
                measureForThreshold[counter].update(temp, trueLabels);
            } catch (MulanRuntimeException e) {
                thresholdHasProblem[counter] = true;
            }
            currentThreshold += step;
            counter++;
        }
    }

    for (int i = 0; i < numOfThresholds; i++) {
        if (!thresholdHasProblem[i])
            performance[i] = Math.abs(measure.getIdealValue() - measureForThreshold[i].getValue());
        else
            performance[i] = Double.MAX_VALUE;
    }

    return min + Utils.minIndex(performance) * step;
}

From source file:mulan.classifier.meta.thresholding.RCut.java

License:Open Source License

/**
 * Evaluates the performance of different threshold values
 *
 * @param data the test data to evaluate different thresholds
 * @param measure the evaluation is based on this parameter
 * @return the sum of differences from the optimal value of the measure for
 * each instance and threshold//  ww  w .ja v  a  2s.co  m
 * @throws Exception
 */
private double[] computeThreshold(MultiLabelLearner learner, MultiLabelInstances data,
        BipartitionMeasureBase measure) throws Exception {
    double[] diff = new double[numLabels + 1];
    measure.reset();
    for (int j = 0; j < data.getNumInstances(); j++) {
        Instance instance = data.getDataSet().instance(j);

        if (data.hasMissingLabels(instance)) {
            continue;
        }

        MultiLabelOutput mlo = learner.makePrediction(instance);

        boolean[] trueLabels = new boolean[numLabels];
        for (int counter = 0; counter < numLabels; counter++) {
            int classIdx = labelIndices[counter];
            String classValue = instance.attribute(classIdx).value((int) instance.value(classIdx));
            trueLabels[counter] = classValue.equals("1");
        }

        int[] ranking = mlo.getRanking();
        for (int threshold = 0; threshold <= numLabels; threshold++) {
            boolean[] bipartition = new boolean[numLabels];
            for (int k = 0; k < numLabels; k++) {
                if (ranking[k] <= threshold) {
                    bipartition[k] = true;
                }
            }
            // this doesn't work with label-based measures
            //                diff[threshold] += Math.abs(measure.getIdealValue() - measure.updateBipartition(bipartition, trueLabels));
        }
    }
    return diff;
}

From source file:mulan.classifier.neural.DataPair.java

License:Open Source License

/**
 * Creates a {@link DataPair} representation for each {@link Instance} contained in
 * {@link MultiLabelInstances} data set. The {@link DataPair} is a light weight representation
 * of instance values (by double values), which is useful when iteration over the data and its
 * values.//from   ww  w . j av a  2s .  co  m
 *
 * @param mlDataSet the {@link MultiLabelInstances} which content has to be
 *          converted to list of {@link DataPair}
 * @param bipolarOutput indicates whether output values should be converted
 *          to bipolar values, or left intact as binary
 * @return the list of data pairs
 */
// TODO: this method should be in some kind of "data utils".
public static List<DataPair> createDataPairs(MultiLabelInstances mlDataSet, boolean bipolarOutput) {

    Instances data = mlDataSet.getDataSet();
    int[] featureIndices = mlDataSet.getFeatureIndices();
    int[] labelIndices = mlDataSet.getLabelIndices();
    int numFeatures = featureIndices.length;
    int numLabels = mlDataSet.getNumLabels();

    int numInstances = data.numInstances();
    List<DataPair> dataPairs = new ArrayList<DataPair>(numInstances);
    for (int index = 0; index < numInstances; index++) {
        Instance instance = data.instance(index);
        double[] input = new double[numFeatures];
        for (int i = 0; i < numFeatures; i++) {
            int featureIndex = featureIndices[i];
            Attribute featureAttr = instance.attribute(featureIndex);
            // if attribute is binary, parse the string value ... it is expected to be '0' or '1'
            if (featureAttr.isNominal() && featureAttr.numValues() == 2) {
                input[i] = Double.parseDouble(instance.stringValue(featureIndex));
            } // else :
              // a) the attribute is nominal with multiple values, use indexes as nominal values
              //    do not have to be numbers in general ... this is fall-back ... should be rare case
              // b) is numeric attribute
            else {
                input[i] = instance.value(featureIndex);
            }
        }

        if (mlDataSet.hasMissingLabels(instance))
            continue;

        double[] output = new double[numLabels];
        for (int i = 0; i < numLabels; i++) {
            output[i] = Double
                    .parseDouble(data.attribute(labelIndices[i]).value((int) instance.value(labelIndices[i])));
            if (bipolarOutput && output[i] == 0) {
                output[i] = -1;
            }
        }

        dataPairs.add(new DataPair(input, output));
    }

    return dataPairs;
}

From source file:mulan.data.IterativeStratification.java

License:Open Source License

private boolean[] getTrueLabels(Instance instance, int numLabels, int[] labelIndices) {

    boolean[] trueLabels = new boolean[numLabels];
    for (int counter = 0; counter < numLabels; counter++) {
        int classIdx = labelIndices[counter];
        String classValue = instance.attribute(classIdx).value((int) instance.value(classIdx));
        trueLabels[counter] = classValue.equals("1");
    }/*from   ww  w  .j av a  2  s.c  o  m*/

    return trueLabels;
}

From source file:mulan.transformations.multiclass.Copy.java

License:Open Source License

/**
 * Transforms a multi-label instance to a list of single-label instances,
 * one for each of the labels that annotate the instance, by copying the
 * feature vector/*  ww  w . j  a  v  a  2 s . c om*/
 *
 * @param instance a multi-label instance
 * @return a list with the transformed single-label instances
 */
List<Instance> transformInstance(Instance instance) {
    List<Instance> result = new ArrayList<Instance>();
    for (int counter = 0; counter < numOfLabels; counter++) {
        if (instance.attribute(labelIndices[counter]).value((int) instance.value(labelIndices[counter]))
                .equals("1")) {
            Instance transformed = null;
            try {
                transformed = RemoveAllLabels.transformInstance(instance, labelIndices);
                transformed.setDataset(null);
                transformed.insertAttributeAt(transformed.numAttributes());
                transformed.setValue(transformed.numAttributes() - 1, counter);
            } catch (Exception ex) {
                Logger.getLogger(Copy.class.getName()).log(Level.SEVERE, null, ex);
            }
            result.add(transformed);
        }
    }
    return result;
}

From source file:mulan.transformations.multiclass.Ignore.java

License:Open Source License

/**
 * Transforms a multi-label example with a single annotation to a
 * single-label example and ignores multi-label example with more
 * annotations/*ww  w.j  a v a 2s.  c  o m*/
 *
 * @param instance a multi-label example
 * @return a list that is either empty or contains the transformed
 * single-label example
 */
List<Instance> transformInstance(Instance instance) {
    List<Instance> result = new ArrayList<Instance>();
    int indexOfSingleLabel = -1;
    int counter = 0;
    for (int labelCounter = 0; labelCounter < numOfLabels; labelCounter++) {
        int index = labelIndices[labelCounter];
        if (instance.attribute(index).value((int) instance.value(index)).equals("1")) {
            counter++;
            indexOfSingleLabel = labelCounter;
        }
        if (counter > 1) {
            break;
        }
    }
    if (counter > 1 || counter == 0) {
        return result;
    }

    Instance transformedInstance;
    try {
        transformedInstance = RemoveAllLabels.transformInstance(instance, labelIndices);
        transformedInstance.setDataset(null);
        transformedInstance.insertAttributeAt(transformedInstance.numAttributes());
        transformedInstance.setValue(transformedInstance.numAttributes() - 1, indexOfSingleLabel);
        result.add(transformedInstance);
    } catch (Exception ex) {
        Logger.getLogger(Ignore.class.getName()).log(Level.SEVERE, null, ex);
    }
    return result;
}

From source file:mulan.transformations.multiclass.SelectBasedOnFrequency.java

License:Open Source License

/**
 * Transforms a multi-label example to a list containing a single-label
 * multi-class example by selecting the most/least frequent label in the 
 * training set//from  www.j  av a  2  s .com
 *
 * @param instance
 * @return
 */
List<Instance> transformInstance(Instance instance) {
    int value = labelOccurance[0];
    int labelSelected = 0;
    for (int counter = 1; counter < numOfLabels; counter++) {
        if (instance.attribute(labelIndices[counter]).value((int) instance.value(labelIndices[counter]))
                .equals("1")) {
            boolean test = false;
            switch (type) {
            case MIN:
                test = labelOccurance[counter] < value ? true : false;
                break;
            case MAX:
                test = labelOccurance[counter] > value ? true : false;
                break;
            }

            if (test) {
                value = labelOccurance[counter];
                labelSelected = counter;
            }
        }
    }

    Instance transformed = null;
    try {
        transformed = RemoveAllLabels.transformInstance(instance, labelIndices);
        transformed.setDataset(null);
        transformed.insertAttributeAt(transformed.numAttributes());
        transformed.setValue(transformed.numAttributes() - 1, labelSelected);
    } catch (Exception ex) {
        Logger.getLogger(Copy.class.getName()).log(Level.SEVERE, null, ex);
    }

    List<Instance> result = new ArrayList<Instance>();
    result.add(transformed);
    return result;
}

From source file:mulan.transformations.multiclass.SelectRandom.java

License:Open Source License

/**
 * Transforms a multi-label example to a list containing a single-label
 * multi-class example by randomly selecting one of the labels
 * /*from   w  w  w .j  a va2 s  .c  om*/
 * @param instance the multi-label example
 * @return the list with the single-label multi-class example
 */
List<Instance> transformInstance(Instance instance) {
    ArrayList<Integer> labels = new ArrayList<Integer>();
    for (int counter = 0; counter < numOfLabels; counter++) {
        if (instance.attribute(labelIndices[counter]).value((int) instance.value(labelIndices[counter]))
                .equals("1")) {
            labels.add(counter);
        }
    }

    int randomLabel = labels.get((int) (Math.random() * labels.size()));

    Instance transformed = null;
    try {
        transformed = RemoveAllLabels.transformInstance(instance, labelIndices);
        transformed.setDataset(null);
        transformed.insertAttributeAt(transformed.numAttributes());
        transformed.setValue(transformed.numAttributes() - 1, randomLabel);
    } catch (Exception ex) {
        Logger.getLogger(Copy.class.getName()).log(Level.SEVERE, null, ex);
    }

    List<Instance> result = new ArrayList<Instance>();
    result.add(transformed);
    return result;
}

From source file:myclassifier.myC45Pack.SplitModel.java

@Override
public int getSubsetIndex(Instance instance) throws Exception {
    if (instance.isMissing(attribIndex)) {
        return -1;
    } else {//from   ww  w. j ava  2 s  .  c om
        if (instance.attribute(attribIndex).isNominal())
            return (int) instance.value(attribIndex);
        else if (instance.value(attribIndex) <= splitPointValue) {
            return 0;
        } else {
            return 1;
        }
    }
}

From source file:myid3andc45classifier.Model.MyC45.java

@Override
public double classifyInstance(Instance instance) {
    int i = 0;/*from www  .ja va2 s. c  o m*/
    if (attribute == null) {
        return label;
    } else {
        boolean numeric = false;
        for (int j = 0; j < instance.numAttributes(); j++) {
            if (instance.attribute(j).isNumeric()) {
                if (instance.attribute(j).name().equalsIgnoreCase(attribute.name().split(" ")[0])) {
                    numeric = true;
                    break;
                }
                i++;
            }
        }
        if (numeric) {
            double threshold = Double.parseDouble(attribute.name().split(" ")[1]);
            //System.out.println("WOWW!!! " + attribute.name() + " threshold is " + threshold);
            double val = (double) instance.value(i);
            if (val <= threshold) {
                return successors[(int) attribute.indexOfValue("<=" + threshold)].classifyInstance(instance);
                //instance.setValue(attribute, "<="+threshold);
            } else {
                return successors[(int) attribute.indexOfValue(">" + threshold)].classifyInstance(instance);
                //instance.setValue(attribute, ">"+threshold);
            }
        }

        return successors[(int) instance.value(attribute)].classifyInstance(instance);
    }

}