Example usage for weka.core Instance value

Introduction

In this page you can find the example usage for weka.core Instance value.

Prototype

public double value(Attribute att);

Source Link

Document

Returns an instance's attribute value in internal format.

Usage

From source file:org.scripps.branch.classifier.ManualTree.java

License:Open Source License

/**
 * Splits instances into subsets based on the given split.
 * // w ww.  ja  v a  2  s  . co  m
 * @param data
 *            the data to work with
 * @return the subsets of instances
 * @throws Exception
 *             if something goes wrong
 */
protected Instances[] splitData(Instances data) throws Exception {

    // Allocate array of Instances objects
    Instances[] subsets = new Instances[m_Prop.length];
    for (int i = 0; i < m_Prop.length; i++) {
        subsets[i] = new Instances(data, data.numInstances());
    }

    if (m_Attribute >= data.numAttributes()) {
        if (m_Attribute >= listOfFc.size() + data.numAttributes() - 1) {
            CustomSet cSet = getReqCustomSet(m_Attribute - (data.numAttributes() - 1 + listOfFc.size()),
                    cSetList);
            JsonNode vertices = mapper.readTree(cSet.getConstraints());
            ArrayList<double[]> attrVertices = generateVerticesList(vertices);
            List<Attribute> aList = generateAttributeList(cSet, data, d);
            double[] testPoint = new double[2];
            int ctr = 0;
            for (int k = 0; k < data.numInstances(); k++) {
                ctr = 0;
                for (Attribute a : aList) {
                    testPoint[ctr] = data.instance(k).value(a);
                    ctr++;
                }
                int check = checkPointInPolygon(attrVertices, testPoint);
                subsets[check].add(data.instance(k));
                continue;
            }
        } else {
            Classifier fc;
            double predictedClass;
            // Go through the data
            for (int i = 0; i < data.numInstances(); i++) {

                // Get instance
                Instance inst = data.instance(i);
                String classifierId = getKeyinMap(listOfFc, m_Attribute, data);
                fc = listOfFc.get(classifierId);
                predictedClass = fc.classifyInstance(inst);
                if (predictedClass != Instance.missingValue()) {
                    subsets[(int) predictedClass].add(inst);
                    continue;
                }

                // Else throw an exception
                throw new IllegalArgumentException("Unknown attribute type");
            }
        }
    } else {
        // Go through the data
        for (int i = 0; i < data.numInstances(); i++) {

            // Get instance
            Instance inst = data.instance(i);

            // Does the instance have a missing value?
            if (inst.isMissing(m_Attribute)) {

                // Split instance up
                for (int k = 0; k < m_Prop.length; k++) {
                    if (m_Prop[k] > 0) {
                        Instance copy = (Instance) inst.copy();
                        copy.setWeight(m_Prop[k] * inst.weight());
                        subsets[k].add(copy);
                    }
                }

                // Proceed to next instance
                continue;
            }

            // Do we have a nominal attribute?
            if (data.attribute(m_Attribute).isNominal()) {
                subsets[(int) inst.value(m_Attribute)].add(inst);

                // Proceed to next instance
                continue;
            }

            // Do we have a numeric attribute?
            if (data.attribute(m_Attribute).isNumeric()) {
                subsets[(inst.value(m_Attribute) < m_SplitPoint) ? 0 : 1].add(inst);

                // Proceed to next instance
                continue;
            }

            // Else throw an exception
            throw new IllegalArgumentException("Unknown attribute type");
        }
    }

    // Save memory
    for (int i = 0; i < m_Prop.length; i++) {
        subsets[i].compactify();
    }

    // Return the subsets
    return subsets;
}

From source file:org.sr.recognition.paleo.paleoNN.PaleoTrainer.java

License:BSD License

/**
 * Converts ARFF instances into a format readable by LibSVM and outputs it
 * to file/*  w ww. j av  a  2  s  .  c o  m*/
 * 
 * @param filename
 *            output file name
 * @param data
 *            ARFF instances (the data)
 * @throws IOException
 *             if output fails
 */
public static void libSVMToFile(String filename, Instances data) throws IOException {
    BufferedWriter writer = new BufferedWriter(new FileWriter(filename));
    for (int i = 0; i < data.numInstances(); i++) {
        Instance inst = data.instance(i);
        double label = inst.value(inst.numValues() - 1);
        writer.write(label + "\t");
        for (int a = 0; a < inst.numValues() - 1; a++) {
            double value = inst.value(a);
            if (Double.isInfinite(value) || Double.isNaN(value))
                value = mean(data.attributeToDoubleArray(a));
            writer.write((a + 1) + ":" + value);
            if (a < inst.numValues() - 2)
                writer.write(" ");
        }
        writer.newLine();
    }
}

From source file:org.stream_gpu.float_knn.float_search.EuclideanDistance.java

License:Open Source License

/**
 * Returns true if the value of the given dimension is smaller or equal the
 * value to be compared with.//from   w  w  w  . ja v a2 s.  c  o m
 * 
 * @param instance    the instance where the value should be taken of
 * @param dim    the dimension of the value
 * @param value    the value to compare with
 * @return       true if value of instance is smaller or equal value
 */
public boolean valueIsSmallerEqual(Instance instance, int dim, float value) { //This stays
    return instance.value(dim) <= value;
}

From source file:org.stream_gpu.float_knn.float_search.NormalizableDistance.java

License:Open Source License

/**
 * Used to initialize the ranges. For this the values of the first
 * instance is used to save time.//from   www.  j ava2 s  . c  o m
 * Sets low and high to the values of the first instance and
 * width to zero.
 * 
 * @param instance    the new instance
 * @param numAtt    number of attributes in the model
 * @param ranges    low, high and width values for all attributes
 */
public void updateRangesFirst(Instance instance, int numAtt, float[][] ranges) {
    for (int j = 0; j < numAtt; j++) {
        if (!instance.isMissing(j)) {
            ranges[j][R_MIN] = (float) instance.value(j);
            ranges[j][R_MAX] = (float) instance.value(j);
            ranges[j][R_WIDTH] = 0.0F;
        } else { // if value was missing
            ranges[j][R_MIN] = Float.POSITIVE_INFINITY;
            ranges[j][R_MAX] = -Float.POSITIVE_INFINITY;
            ranges[j][R_WIDTH] = Float.POSITIVE_INFINITY;
        }
    }
}

From source file:org.stream_gpu.float_knn.float_search.NormalizableDistance.java

License:Open Source License

/**
 * Updates the minimum and maximum and width values for all the attributes
 * based on a new instance./*from   w  w  w.  j av a  2s  .  c om*/
 * 
 * @param instance    the new instance
 * @param numAtt    number of attributes in the model
 * @param ranges    low, high and width values for all attributes
 */
public void updateRanges(Instance instance, int numAtt, float[][] ranges) {
    // updateRangesFirst must have been called on ranges
    for (int j = 0; j < numAtt; j++) {
        float value = (float) instance.value(j);
        if (!instance.isMissing(j)) {
            if (value < ranges[j][R_MIN]) {
                ranges[j][R_MIN] = value;
                ranges[j][R_WIDTH] = ranges[j][R_MAX] - ranges[j][R_MIN];
                if (value > ranges[j][R_MAX]) { //if this is the first value that is
                    ranges[j][R_MAX] = value; //not missing. The,0
                    ranges[j][R_WIDTH] = ranges[j][R_MAX] - ranges[j][R_MIN];
                }
            } else {
                if (value > ranges[j][R_MAX]) {
                    ranges[j][R_MAX] = value;
                    ranges[j][R_WIDTH] = ranges[j][R_MAX] - ranges[j][R_MIN];
                }
            }
        }
    }
}

From source file:org.stream_gpu.float_knn.float_search.NormalizableDistance.java

License:Open Source License

/**
 * Updates the ranges given a new instance.
 * //from w w  w.j a v a 2  s .c  om
 * @param instance    the new instance
 * @param ranges    low, high and width values for all attributes
 * @return      the updated ranges
 */
public float[][] updateRanges(Instance instance, float[][] ranges) {
    // updateRangesFirst must have been called on ranges
    for (int j = 0; j < ranges.length; j++) {
        float value = (float) instance.value(j);
        if (!instance.isMissing(j)) {
            if (value < ranges[j][R_MIN]) {
                ranges[j][R_MIN] = value;
                ranges[j][R_WIDTH] = ranges[j][R_MAX] - ranges[j][R_MIN];
            } else {
                if (instance.value(j) > ranges[j][R_MAX]) {
                    ranges[j][R_MAX] = value;
                    ranges[j][R_WIDTH] = ranges[j][R_MAX] - ranges[j][R_MIN];
                }
            }
        }
    }

    return ranges;
}

From source file:org.stream_gpu.float_knn.float_search.NormalizableDistance.java

License:Open Source License

/**
 * Test if an instance is within the given ranges.
 * /*from  w w w . j  a va  2s .  c  om*/
 * @param instance    the instance
 * @param ranges    the ranges the instance is tested to be in
 * @return true    if instance is within the ranges
 */
public boolean inRanges(Instance instance, float[][] ranges) {
    boolean isIn = true;

    // updateRangesFirst must have been called on ranges
    for (int j = 0; isIn && (j < ranges.length); j++) {
        if (!instance.isMissing(j)) {
            float value = (float) instance.value(j);
            isIn = value <= ranges[j][R_MAX];
            if (isIn)
                isIn = value >= ranges[j][R_MIN];
        }
    }

    return isIn;
}

From source file:org.uclab.mm.kcl.ddkat.datapreprocessor.OutlierHandler.java

License:Apache License

/**
 * Method to replace the detected outlier values.
 *
 * @throws Exception the exception//from  ww w  .  j  ava  2  s . co  m
 */
public void replaceOutliers() throws Exception {

    Instances inputData, outputData;

    String inputFile = BASE_DIR + "OriginalDataSet.csv";

    // load CSV file
    CSVLoader fileLoader = new CSVLoader();
    fileLoader.setSource(new File(inputFile));
    inputData = fileLoader.getDataSet();
    this.setInputFormat(inputData);
    outputData = Filter.useFilter(inputData, this);

    int numInstances = outputData.numInstances();
    int numAttributes = outputData.numAttributes();

    final int NON_NUMERIC = -1;
    double[] outlier_AttributeValues = null;
    double[] extreme_AttributeValues = null;
    int[] m_AttributeIndices = null;

    Range m_Attributes = new Range("first-last");

    // attributes must be numeric
    m_Attributes.setUpper(outputData.numAttributes() - 1);
    m_AttributeIndices = m_Attributes.getSelection();

    for (int i = 0; i < m_AttributeIndices.length; i++) {
        // ignore class
        if (m_AttributeIndices[i] == outputData.classIndex()) {
            m_AttributeIndices[i] = NON_NUMERIC;
            continue;
        }
        // not numeric -> ignore it
        if (!outputData.attribute(m_AttributeIndices[i]).isNumeric())
            m_AttributeIndices[i] = NON_NUMERIC;
    }

    for (int instanceIndex = 0; instanceIndex < numInstances; instanceIndex++) {
        // access instance 
        Instance tempInstance = outputData.instance(instanceIndex);

        for (int attributeIndex = 0; attributeIndex < numAttributes; attributeIndex++) {
            // non-numeric attribute?
            if (m_AttributeIndices[attributeIndex] == NON_NUMERIC) {
                continue;
            }

            // detect the outlier values using Interquartile approach
            if (this.isOutlier(tempInstance, m_AttributeIndices[attributeIndex])) {

                double outlierValue = tempInstance.value(attributeIndex);
                int outlierColumnIndex = attributeIndex;
                double sum = 0.0;

                outlier_AttributeValues = outputData.attributeToDoubleArray(outlierColumnIndex);

                for (int i = 0; i < outlier_AttributeValues.length; i++) {
                    sum = sum + outlier_AttributeValues[i];
                }

                sum = sum - outlierValue;
                double replacedValue = sum / (outlier_AttributeValues.length - 1);
                replacedValue = Math.round(replacedValue * 100D) / 100D;

                // replace the outliers with attribute mean values
                outputData.instance(instanceIndex).setValue(outlierColumnIndex, replacedValue);
            }

            // extreme value?
            if (this.isExtremeValue(tempInstance, m_AttributeIndices[attributeIndex])) {

                double extremeValue = tempInstance.value(attributeIndex);
                int extremeColumnIndex = attributeIndex;
                double sum = 0.0;

                extreme_AttributeValues = outputData.attributeToDoubleArray(extremeColumnIndex);

                for (int i = 0; i < extreme_AttributeValues.length; i++) {
                    sum = sum + extreme_AttributeValues[i];
                }

                sum = sum - extremeValue;
                double replacedValue = sum / (extreme_AttributeValues.length - 1);
                replacedValue = Math.round(replacedValue * 100D) / 100D;
                outputData.instance(instanceIndex).setValue(extremeColumnIndex, replacedValue);
            }

        }
    }
    outputData.deleteAttributeAt(outputData.numAttributes() - 1);
    outputData.deleteAttributeAt(outputData.numAttributes() - 1);

    saveConsistentData(inputFile, outputData);

}

From source file:org.wikipedia.miner.annotation.Disambiguator.java

License:Open Source License

@SuppressWarnings("unchecked")
private void weightTrainingInstances() {

    double positiveInstances = 0;
    double negativeInstances = 0;

    Enumeration<Instance> e = trainingData.enumerateInstances();

    while (e.hasMoreElements()) {
        Instance i = (Instance) e.nextElement();

        double isValidSense = i.value(3);

        if (isValidSense == 0)
            positiveInstances++;/*w  ww.  j  a va  2  s.c o m*/
        else
            negativeInstances++;
    }

    double p = (double) positiveInstances / (positiveInstances + negativeInstances);

    e = trainingData.enumerateInstances();

    while (e.hasMoreElements()) {
        Instance i = (Instance) e.nextElement();

        double isValidSense = i.value(3);

        if (isValidSense == 0)
            i.setWeight(0.5 * (1.0 / p));
        else
            i.setWeight(0.5 * (1.0 / (1 - p)));
    }

}

From source file:org.wikipedia.miner.annotation.weighting.LinkDetector.java

License:Open Source License

@SuppressWarnings("unchecked")
private void weightTrainingInstances() {

    double positiveInstances = 0;
    double negativeInstances = 0;

    Enumeration<Instance> e = trainingData.enumerateInstances();

    while (e.hasMoreElements()) {
        Instance i = e.nextElement();

        double isValidSense = i.value(attributes.size() - 1);

        if (isValidSense == 0)
            positiveInstances++;/*from   w  w  w .  j  a v a  2  s . c o  m*/
        else
            negativeInstances++;
    }

    double p = (double) positiveInstances / (positiveInstances + negativeInstances);

    System.out.println("stats: positive=" + p + ", negative=" + (1 - p));

    e = trainingData.enumerateInstances();

    while (e.hasMoreElements()) {
        Instance i = e.nextElement();

        double isLinked = i.value(attributes.size() - 1);

        if (isLinked == 0)
            i.setWeight(0.5 * (1.0 / p));
        else
            i.setWeight(0.5 * (1.0 / (1 - p)));
    }
}