Example usage for weka.core Instance value

List of usage examples for weka.core Instance value

Introduction

In this page you can find the example usage for weka.core Instance value.

Prototype

public double value(Attribute att);

Source Link

Document

Returns an instance's attribute value in internal format.

Usage

From source file:org.scripps.branch.classifier.ManualTree.java

License:Open Source License

/**
 * Splits instances into subsets based on the given split.
 * // w ww.  ja  v a  2  s  . co  m
 * @param data
 *            the data to work with
 * @return the subsets of instances
 * @throws Exception
 *             if something goes wrong
 */
protected Instances[] splitData(Instances data) throws Exception {

    // Allocate array of Instances objects
    Instances[] subsets = new Instances[m_Prop.length];
    for (int i = 0; i < m_Prop.length; i++) {
        subsets[i] = new Instances(data, data.numInstances());
    }

    if (m_Attribute >= data.numAttributes()) {
        if (m_Attribute >= listOfFc.size() + data.numAttributes() - 1) {
            CustomSet cSet = getReqCustomSet(m_Attribute - (data.numAttributes() - 1 + listOfFc.size()),
                    cSetList);
            JsonNode vertices = mapper.readTree(cSet.getConstraints());
            ArrayList<double[]> attrVertices = generateVerticesList(vertices);
            List<Attribute> aList = generateAttributeList(cSet, data, d);
            double[] testPoint = new double[2];
            int ctr = 0;
            for (int k = 0; k < data.numInstances(); k++) {
                ctr = 0;
                for (Attribute a : aList) {
                    testPoint[ctr] = data.instance(k).value(a);
                    ctr++;
                }
                int check = checkPointInPolygon(attrVertices, testPoint);
                subsets[check].add(data.instance(k));
                continue;
            }
        } else {
            Classifier fc;
            double predictedClass;
            // Go through the data
            for (int i = 0; i < data.numInstances(); i++) {

                // Get instance
                Instance inst = data.instance(i);
                String classifierId = getKeyinMap(listOfFc, m_Attribute, data);
                fc = listOfFc.get(classifierId);
                predictedClass = fc.classifyInstance(inst);
                if (predictedClass != Instance.missingValue()) {
                    subsets[(int) predictedClass].add(inst);
                    continue;
                }

                // Else throw an exception
                throw new IllegalArgumentException("Unknown attribute type");
            }
        }
    } else {
        // Go through the data
        for (int i = 0; i < data.numInstances(); i++) {

            // Get instance
            Instance inst = data.instance(i);

            // Does the instance have a missing value?
            if (inst.isMissing(m_Attribute)) {

                // Split instance up
                for (int k = 0; k < m_Prop.length; k++) {
                    if (m_Prop[k] > 0) {
                        Instance copy = (Instance) inst.copy();
                        copy.setWeight(m_Prop[k] * inst.weight());
                        subsets[k].add(copy);
                    }
                }

                // Proceed to next instance
                continue;
            }

            // Do we have a nominal attribute?
            if (data.attribute(m_Attribute).isNominal()) {
                subsets[(int) inst.value(m_Attribute)].add(inst);

                // Proceed to next instance
                continue;
            }

            // Do we have a numeric attribute?
            if (data.attribute(m_Attribute).isNumeric()) {
                subsets[(inst.value(m_Attribute) < m_SplitPoint) ? 0 : 1].add(inst);

                // Proceed to next instance
                continue;
            }

            // Else throw an exception
            throw new IllegalArgumentException("Unknown attribute type");
        }
    }

    // Save memory
    for (int i = 0; i < m_Prop.length; i++) {
        subsets[i].compactify();
    }

    // Return the subsets
    return subsets;
}

From source file:org.sr.recognition.paleo.paleoNN.PaleoTrainer.java

License:BSD License

/**
 * Converts ARFF instances into a format readable by LibSVM and outputs it
 * to file/*  w ww. j av  a  2  s  .  c o  m*/
 * 
 * @param filename
 *            output file name
 * @param data
 *            ARFF instances (the data)
 * @throws IOException
 *             if output fails
 */
public static void libSVMToFile(String filename, Instances data) throws IOException {
    BufferedWriter writer = new BufferedWriter(new FileWriter(filename));
    for (int i = 0; i < data.numInstances(); i++) {
        Instance inst = data.instance(i);
        double label = inst.value(inst.numValues() - 1);
        writer.write(label + "\t");
        for (int a = 0; a < inst.numValues() - 1; a++) {
            double value = inst.value(a);
            if (Double.isInfinite(value) || Double.isNaN(value))
                value = mean(data.attributeToDoubleArray(a));
            writer.write((a + 1) + ":" + value);
            if (a < inst.numValues() - 2)
                writer.write(" ");
        }
        writer.newLine();
    }
}

From source file:org.stream_gpu.float_knn.float_search.EuclideanDistance.java

License:Open Source License

/**
 * Returns true if the value of the given dimension is smaller or equal the
 * value to be compared with.//from   w  w  w  . ja v a2 s.  c  o m
 * 
 * @param instance    the instance where the value should be taken of
 * @param dim    the dimension of the value
 * @param value    the value to compare with
 * @return       true if value of instance is smaller or equal value
 */
public boolean valueIsSmallerEqual(Instance instance, int dim, float value) { //This stays
    return instance.value(dim) <= value;
}

From source file:org.stream_gpu.float_knn.float_search.NormalizableDistance.java

License:Open Source License

/**
 * Used to initialize the ranges. For this the values of the first
 * instance is used to save time.//from   www.  j ava2 s  . c  o m
 * Sets low and high to the values of the first instance and
 * width to zero.
 * 
 * @param instance    the new instance
 * @param numAtt    number of attributes in the model
 * @param ranges    low, high and width values for all attributes
 */
public void updateRangesFirst(Instance instance, int numAtt, float[][] ranges) {
    for (int j = 0; j < numAtt; j++) {
        if (!instance.isMissing(j)) {
            ranges[j][R_MIN] = (float) instance.value(j);
            ranges[j][R_MAX] = (float) instance.value(j);
            ranges[j][R_WIDTH] = 0.0F;
        } else { // if value was missing
            ranges[j][R_MIN] = Float.POSITIVE_INFINITY;
            ranges[j][R_MAX] = -Float.POSITIVE_INFINITY;
            ranges[j][R_WIDTH] = Float.POSITIVE_INFINITY;
        }
    }
}

From source file:org.stream_gpu.float_knn.float_search.NormalizableDistance.java

License:Open Source License

/**
 * Updates the minimum and maximum and width values for all the attributes
 * based on a new instance./*from   w  w  w.  j av a  2s  .  c om*/
 * 
 * @param instance    the new instance
 * @param numAtt    number of attributes in the model
 * @param ranges    low, high and width values for all attributes
 */
public void updateRanges(Instance instance, int numAtt, float[][] ranges) {
    // updateRangesFirst must have been called on ranges
    for (int j = 0; j < numAtt; j++) {
        float value = (float) instance.value(j);
        if (!instance.isMissing(j)) {
            if (value < ranges[j][R_MIN]) {
                ranges[j][R_MIN] = value;
                ranges[j][R_WIDTH] = ranges[j][R_MAX] - ranges[j][R_MIN];
                if (value > ranges[j][R_MAX]) { //if this is the first value that is
                    ranges[j][R_MAX] = value; //not missing. The,0
                    ranges[j][R_WIDTH] = ranges[j][R_MAX] - ranges[j][R_MIN];
                }
            } else {
                if (value > ranges[j][R_MAX]) {
                    ranges[j][R_MAX] = value;
                    ranges[j][R_WIDTH] = ranges[j][R_MAX] - ranges[j][R_MIN];
                }
            }
        }
    }
}

From source file:org.stream_gpu.float_knn.float_search.NormalizableDistance.java

License:Open Source License

/**
 * Updates the ranges given a new instance.
 * //from w w  w.j a v a 2  s .c  om
 * @param instance    the new instance
 * @param ranges    low, high and width values for all attributes
 * @return      the updated ranges
 */
public float[][] updateRanges(Instance instance, float[][] ranges) {
    // updateRangesFirst must have been called on ranges
    for (int j = 0; j < ranges.length; j++) {
        float value = (float) instance.value(j);
        if (!instance.isMissing(j)) {
            if (value < ranges[j][R_MIN]) {
                ranges[j][R_MIN] = value;
                ranges[j][R_WIDTH] = ranges[j][R_MAX] - ranges[j][R_MIN];
            } else {
                if (instance.value(j) > ranges[j][R_MAX]) {
                    ranges[j][R_MAX] = value;
                    ranges[j][R_WIDTH] = ranges[j][R_MAX] - ranges[j][R_MIN];
                }
            }
        }
    }

    return ranges;
}

From source file:org.stream_gpu.float_knn.float_search.NormalizableDistance.java

License:Open Source License

/**
 * Test if an instance is within the given ranges.
 * /*from  w w w . j  a va  2s .  c  om*/
 * @param instance    the instance
 * @param ranges    the ranges the instance is tested to be in
 * @return true    if instance is within the ranges
 */
public boolean inRanges(Instance instance, float[][] ranges) {
    boolean isIn = true;

    // updateRangesFirst must have been called on ranges
    for (int j = 0; isIn && (j < ranges.length); j++) {
        if (!instance.isMissing(j)) {
            float value = (float) instance.value(j);
            isIn = value <= ranges[j][R_MAX];
            if (isIn)
                isIn = value >= ranges[j][R_MIN];
        }
    }

    return isIn;
}

From source file:org.uclab.mm.kcl.ddkat.datapreprocessor.OutlierHandler.java

License:Apache License

/**
 * Method to replace the detected outlier values.
 *
 * @throws Exception the exception//from  ww w  .  j  ava  2  s . co  m
 */
public void replaceOutliers() throws Exception {

    Instances inputData, outputData;

    String inputFile = BASE_DIR + "OriginalDataSet.csv";

    // load CSV file
    CSVLoader fileLoader = new CSVLoader();
    fileLoader.setSource(new File(inputFile));
    inputData = fileLoader.getDataSet();
    this.setInputFormat(inputData);
    outputData = Filter.useFilter(inputData, this);

    int numInstances = outputData.numInstances();
    int numAttributes = outputData.numAttributes();

    final int NON_NUMERIC = -1;
    double[] outlier_AttributeValues = null;
    double[] extreme_AttributeValues = null;
    int[] m_AttributeIndices = null;

    Range m_Attributes = new Range("first-last");

    // attributes must be numeric
    m_Attributes.setUpper(outputData.numAttributes() - 1);
    m_AttributeIndices = m_Attributes.getSelection();

    for (int i = 0; i < m_AttributeIndices.length; i++) {
        // ignore class
        if (m_AttributeIndices[i] == outputData.classIndex()) {
            m_AttributeIndices[i] = NON_NUMERIC;
            continue;
        }
        // not numeric -> ignore it
        if (!outputData.attribute(m_AttributeIndices[i]).isNumeric())
            m_AttributeIndices[i] = NON_NUMERIC;
    }

    for (int instanceIndex = 0; instanceIndex < numInstances; instanceIndex++) {
        // access instance 
        Instance tempInstance = outputData.instance(instanceIndex);

        for (int attributeIndex = 0; attributeIndex < numAttributes; attributeIndex++) {
            // non-numeric attribute?
            if (m_AttributeIndices[attributeIndex] == NON_NUMERIC) {
                continue;
            }

            // detect the outlier values using Interquartile approach
            if (this.isOutlier(tempInstance, m_AttributeIndices[attributeIndex])) {

                double outlierValue = tempInstance.value(attributeIndex);
                int outlierColumnIndex = attributeIndex;
                double sum = 0.0;

                outlier_AttributeValues = outputData.attributeToDoubleArray(outlierColumnIndex);

                for (int i = 0; i < outlier_AttributeValues.length; i++) {
                    sum = sum + outlier_AttributeValues[i];
                }

                sum = sum - outlierValue;
                double replacedValue = sum / (outlier_AttributeValues.length - 1);
                replacedValue = Math.round(replacedValue * 100D) / 100D;

                // replace the outliers with attribute mean values
                outputData.instance(instanceIndex).setValue(outlierColumnIndex, replacedValue);
            }

            // extreme value?
            if (this.isExtremeValue(tempInstance, m_AttributeIndices[attributeIndex])) {

                double extremeValue = tempInstance.value(attributeIndex);
                int extremeColumnIndex = attributeIndex;
                double sum = 0.0;

                extreme_AttributeValues = outputData.attributeToDoubleArray(extremeColumnIndex);

                for (int i = 0; i < extreme_AttributeValues.length; i++) {
                    sum = sum + extreme_AttributeValues[i];
                }

                sum = sum - extremeValue;
                double replacedValue = sum / (extreme_AttributeValues.length - 1);
                replacedValue = Math.round(replacedValue * 100D) / 100D;
                outputData.instance(instanceIndex).setValue(extremeColumnIndex, replacedValue);
            }

        }
    }
    outputData.deleteAttributeAt(outputData.numAttributes() - 1);
    outputData.deleteAttributeAt(outputData.numAttributes() - 1);

    saveConsistentData(inputFile, outputData);

}

From source file:org.wikipedia.miner.annotation.Disambiguator.java

License:Open Source License

@SuppressWarnings("unchecked")
private void weightTrainingInstances() {

    double positiveInstances = 0;
    double negativeInstances = 0;

    Enumeration<Instance> e = trainingData.enumerateInstances();

    while (e.hasMoreElements()) {
        Instance i = (Instance) e.nextElement();

        double isValidSense = i.value(3);

        if (isValidSense == 0)
            positiveInstances++;/*w  ww.  j  a va  2  s.c o m*/
        else
            negativeInstances++;
    }

    double p = (double) positiveInstances / (positiveInstances + negativeInstances);

    e = trainingData.enumerateInstances();

    while (e.hasMoreElements()) {
        Instance i = (Instance) e.nextElement();

        double isValidSense = i.value(3);

        if (isValidSense == 0)
            i.setWeight(0.5 * (1.0 / p));
        else
            i.setWeight(0.5 * (1.0 / (1 - p)));
    }

}

From source file:org.wikipedia.miner.annotation.weighting.LinkDetector.java

License:Open Source License

@SuppressWarnings("unchecked")
private void weightTrainingInstances() {

    double positiveInstances = 0;
    double negativeInstances = 0;

    Enumeration<Instance> e = trainingData.enumerateInstances();

    while (e.hasMoreElements()) {
        Instance i = e.nextElement();

        double isValidSense = i.value(attributes.size() - 1);

        if (isValidSense == 0)
            positiveInstances++;/*from   w  w  w .  j  a v a  2  s . c o  m*/
        else
            negativeInstances++;
    }

    double p = (double) positiveInstances / (positiveInstances + negativeInstances);

    System.out.println("stats: positive=" + p + ", negative=" + (1 - p));

    e = trainingData.enumerateInstances();

    while (e.hasMoreElements()) {
        Instance i = e.nextElement();

        double isLinked = i.value(attributes.size() - 1);

        if (isLinked == 0)
            i.setWeight(0.5 * (1.0 / p));
        else
            i.setWeight(0.5 * (1.0 / (1 - p)));
    }
}