Example usage for weka.core Instance value

List of usage examples for weka.core Instance value

Introduction

In this page you can find the example usage for weka.core Instance value.

Prototype

public double value(Attribute att);

Source Link

Document

Returns an instance's attribute value in internal format.

Usage

From source file:Classifier.supervised.LinearRegression.java

License:Open Source License

/**
 * Calculate the dependent value for a given instance for a
 * given regression model.//from  w  w  w .  j  a  v  a2  s  .  c o m
 *
 * @param transformedInstance the input instance
 * @param selectedAttributes an array of flags indicating which 
 * attributes are included in the regression model
 * @param coefficients an array of coefficients for the regression
 * model
 * @return the regression value for the instance.
 * @throws Exception if the class attribute of the input instance
 * is not assigned
 */
protected double regressionPrediction(Instance transformedInstance, boolean[] selectedAttributes,
        double[] coefficients) throws Exception {

    double result = 0;
    int column = 0;
    for (int j = 0; j < transformedInstance.numAttributes(); j++) {
        if ((m_ClassIndex != j) && (selectedAttributes[j])) {
            result += coefficients[column] * transformedInstance.value(j);
            column++;
        }
    }
    result += coefficients[column];

    return result;
}

From source file:Classifier.supervised.LinearRegression.java

License:Open Source License

/**
 * Calculate a linear regression using the selected attributes
 *
 * @param selectedAttributes an array of booleans where each element
 * is true if the corresponding attribute should be included in the
 * regression.//from   w w w  .  ja v  a 2s . c om
 * @return an array of coefficients for the linear regression model.
 * @throws Exception if an error occurred during the regression.
 */
protected double[] doRegression(boolean[] selectedAttributes) throws Exception {

    if (m_Debug) {
        System.out.print("doRegression(");
        for (int i = 0; i < selectedAttributes.length; i++) {
            System.out.print(" " + selectedAttributes[i]);
        }
        System.out.println(" )");
    }
    int numAttributes = 0;
    for (int i = 0; i < selectedAttributes.length; i++) {
        if (selectedAttributes[i]) {
            numAttributes++;
        }
    }

    // Check whether there are still attributes left
    Matrix independent = null, dependent = null;
    if (numAttributes > 0) {
        independent = new Matrix(m_TransformedData.numInstances(), numAttributes);
        dependent = new Matrix(m_TransformedData.numInstances(), 1);
        for (int i = 0; i < m_TransformedData.numInstances(); i++) {
            Instance inst = m_TransformedData.instance(i);
            double sqrt_weight = Math.sqrt(inst.weight());
            int column = 0;
            for (int j = 0; j < m_TransformedData.numAttributes(); j++) {
                if (j == m_ClassIndex) {
                    dependent.set(i, 0, inst.classValue() * sqrt_weight);
                } else {
                    if (selectedAttributes[j]) {
                        double value = inst.value(j) - m_Means[j];

                        // We only need to do this if we want to
                        // scale the input
                        if (!m_checksTurnedOff) {
                            value /= m_StdDevs[j];
                        }
                        independent.set(i, column, value * sqrt_weight);
                        column++;
                    }
                }
            }
        }
    }

    // Compute coefficients (note that we have to treat the
    // intercept separately so that it doesn't get affected
    // by the ridge constant.)
    double[] coefficients = new double[numAttributes + 1];
    if (numAttributes > 0) {
        double[] coeffsWithoutIntercept = independent.regression(dependent, m_Ridge).getCoefficients();
        System.arraycopy(coeffsWithoutIntercept, 0, coefficients, 0, numAttributes);
    }
    coefficients[numAttributes] = m_ClassMean;

    // Convert coefficients into original scale
    int column = 0;
    for (int i = 0; i < m_TransformedData.numAttributes(); i++) {
        if ((i != m_TransformedData.classIndex()) && (selectedAttributes[i])) {

            // We only need to do this if we have scaled the
            // input.
            if (!m_checksTurnedOff) {
                coefficients[column] /= m_StdDevs[i];
            }

            // We have centred the input
            coefficients[coefficients.length - 1] -= coefficients[column] * m_Means[i];
            column++;
        }
    }

    return coefficients;
}

From source file:Classifiers.BRkNN.java

License:Open Source License

/**
 * Select the best value for k by hold-one-out cross-validation. Hamming
 * Loss is minimized// ww w  .  jav  a 2 s  .  com
 *
 * @throws Exception Potential exception thrown. To be handled in an upper level.
 */
private void crossValidate() throws Exception {
    try {
        // the performance for each different k
        double[] hammingLoss = new double[cvMaxK];

        for (int i = 0; i < cvMaxK; i++) {
            hammingLoss[i] = 0;
        }

        Instances dataSet = train;
        Instance instance; // the hold out instance
        Instances neighbours; // the neighboring instances
        double[] origDistances, convertedDistances;
        for (int i = 0; i < dataSet.numInstances(); i++) {
            if (getDebug() && (i % 50 == 0)) {
                debug("Cross validating " + i + "/" + dataSet.numInstances() + "\r");
            }
            instance = dataSet.instance(i);
            neighbours = lnn.kNearestNeighbours(instance, cvMaxK);
            origDistances = lnn.getDistances();

            // gathering the true labels for the instance
            boolean[] trueLabels = new boolean[numLabels];
            for (int counter = 0; counter < numLabels; counter++) {
                int classIdx = labelIndices[counter];
                String classValue = instance.attribute(classIdx).value((int) instance.value(classIdx));
                trueLabels[counter] = classValue.equals("1");
            }
            // calculate the performance metric for each different k
            for (int j = cvMaxK; j > 0; j--) {
                convertedDistances = new double[origDistances.length];
                System.arraycopy(origDistances, 0, convertedDistances, 0, origDistances.length);
                double[] confidences = this.getConfidences(neighbours, convertedDistances);
                boolean[] bipartition = null;

                switch (extension) {
                case NONE: // BRknn
                    MultiLabelOutput results;
                    results = new MultiLabelOutput(confidences, 0.5);
                    bipartition = results.getBipartition();
                    break;
                case EXTA: // BRknn-a
                    bipartition = labelsFromConfidences2(confidences);
                    break;
                case EXTB: // BRknn-b
                    bipartition = labelsFromConfidences3(confidences);
                    break;
                }

                double symmetricDifference = 0; // |Y xor Z|
                for (int labelIndex = 0; labelIndex < numLabels; labelIndex++) {
                    boolean actual = trueLabels[labelIndex];
                    boolean predicted = bipartition[labelIndex];

                    if (predicted != actual) {
                        symmetricDifference++;
                    }
                }
                hammingLoss[j - 1] += (symmetricDifference / numLabels);

                neighbours = new IBk().pruneToK(neighbours, convertedDistances, j - 1);
            }
        }

        // Display the results of the cross-validation
        if (getDebug()) {
            for (int i = cvMaxK; i > 0; i--) {
                debug("Hold-one-out performance of " + (i) + " neighbors ");
                debug("(Hamming Loss) = " + hammingLoss[i - 1] / dataSet.numInstances());
            }
        }

        // Check through the performance stats and select the best
        // k value (or the lowest k if more than one best)
        double[] searchStats = hammingLoss;

        double bestPerformance = Double.NaN;
        int bestK = 1;
        for (int i = 0; i < cvMaxK; i++) {
            if (Double.isNaN(bestPerformance) || (bestPerformance > searchStats[i])) {
                bestPerformance = searchStats[i];
                bestK = i + 1;
            }
        }
        numOfNeighbors = bestK;
        if (getDebug()) {
            System.err.println("Selected k = " + bestK);
        }

    } catch (Exception ex) {
        throw new Error("Couldn't optimize by cross-validation: " + ex.getMessage());
    }
}

From source file:Classifiers.BRkNN.java

License:Open Source License

/**
 * Calculates the confidences of the labels, based on the neighboring
 * instances//  w  ww  . j a va  2 s  .c om
 *
 * @param neighbours
 *            the list of nearest neighboring instances
 * @param distances
 *            the distances of the neighbors
 * @return the confidences of the labels
 */
private double[] getConfidences(Instances neighbours, double[] distances) {
    double total, weight;
    double neighborLabels = 0;
    double[] confidences = new double[numLabels];

    // Set up a correction to the estimator
    for (int i = 0; i < numLabels; i++) {
        confidences[i] = 1.0 / Math.max(1, train.numInstances());
    }
    total = (double) numLabels / Math.max(1, train.numInstances());

    for (int i = 0; i < neighbours.numInstances(); i++) {
        // Collect class counts
        Instance current = neighbours.instance(i);
        distances[i] = distances[i] * distances[i];
        distances[i] = Math.sqrt(distances[i] / (train.numAttributes() - numLabels));
        weight = 1.0;
        weight *= current.weight();

        for (int j = 0; j < numLabels; j++) {
            double value = Double.parseDouble(
                    current.attribute(labelIndices[j]).value((int) current.value(labelIndices[j])));
            if (Utils.eq(value, 1.0)) {
                confidences[j] += weight;
                neighborLabels += weight;
            }
        }
        total += weight;
    }

    avgPredictedLabels = (int) Math.round(neighborLabels / total);
    // Normalise distribution
    if (total > 0) {
        Utils.normalize(confidences, total);
    }
    return confidences;
}

From source file:classify.Classifier.java

public static void missingValuesRows(Instances data) {
    int[] missingValues = new int[data.numInstances()];
    for (int i = 0; i < data.numInstances(); i++) {
        missingValues[i] = 0;// w w w .j a v a 2s  .  c  om
    }
    Instance example;
    String value = "";
    //get number of missing attributes per row
    int missValues = 0;
    for (int i = 0; i < data.numInstances(); i++) {
        example = data.instance(i);
        for (int j = 0; j < 15; j++) {
            if (example.attribute(j).isNominal()) {
                value = example.stringValue(j);
            } else if (example.attribute(j).isNumeric()) {
                value = Double.toString(example.value(j));
            }
            if (value.equals("?") || value.equals("NaN")) {
                missingValues[i]++;
                missValues++;
            }
        }
    }
    System.out.println("Number of Missing Values: " + missValues);
    //get how many times i attributes are missing
    int[] frequency = new int[15];
    for (int i = 0; i < data.numInstances(); i++) {
        frequency[missingValues[i]]++;
    }
    int numRows = 0;
    for (int i = 0; i < data.numInstances(); i++) {
        if (missingValues[i] > 0) {
            numRows++;
        }
    }
    System.out.println("Number of rows with missing values: " + numRows);
    System.out.println("Number of missing attributes per row:");
    for (int i = 0; i < 15; i++) {
        System.out.println(i + ": " + frequency[i]);
    }
}

From source file:cluster.ABC.ClusterUtils.java

License:Open Source License

/** This function divides every attribute value in an instance by
 *  the instance weight -- useful to find the mean of a cluster in
 *  Euclidean space /*w  w  w  .  j  a v a  2  s.  c  om*/
 *  @param inst Instance passed in for normalization (destructive update)
 */
public static void normalizeByWeight(Instance inst) {
    double weight = inst.weight();
    if (inst instanceof SparseInstance) {
        for (int i = 0; i < inst.numValues(); i++) {
            inst.setValueSparse(i, inst.valueSparse(i) / weight);
        }
    } else if (!(inst instanceof SparseInstance)) {
        for (int i = 0; i < inst.numAttributes(); i++) {
            inst.setValue(i, inst.value(i) / weight);
        }
    }
}

From source file:cluster.ABC.ClusterUtils.java

License:Open Source License

/** Finds sum of 2 instances (handles sparse and non-sparse)
 *//*from w w w  .j  av  a  2  s .c  om*/

public static Instance sumInstances(Instance inst1, Instance inst2, Instances m_Instances) throws Exception {
    int numAttributes = inst1.numAttributes();
    if (inst2.numAttributes() != numAttributes) {
        throw new Exception("Error!! inst1 and inst2 should have same number of attributes.");
    }
    double weight1 = inst1.weight(), weight2 = inst2.weight();
    double[] values = new double[numAttributes];

    for (int i = 0; i < numAttributes; i++) {
        values[i] = 0;
    }

    if (inst1 instanceof SparseInstance && inst2 instanceof SparseInstance) {
        for (int i = 0; i < inst1.numValues(); i++) {
            int indexOfIndex = inst1.index(i);
            values[indexOfIndex] = inst1.valueSparse(i);
        }
        for (int i = 0; i < inst2.numValues(); i++) {
            int indexOfIndex = inst2.index(i);
            values[indexOfIndex] += inst2.valueSparse(i);
        }
        SparseInstance newInst = new SparseInstance(weight1 + weight2, values);
        newInst.setDataset(m_Instances);
        return newInst;
    } else if (!(inst1 instanceof SparseInstance) && !(inst2 instanceof SparseInstance)) {
        for (int i = 0; i < numAttributes; i++) {
            values[i] = inst1.value(i) + inst2.value(i);
        }
    } else {
        throw new Exception("Error!! inst1 and inst2 should be both of same type -- sparse or non-sparse");
    }
    Instance newInst = new Instance(weight1 + weight2, values);
    newInst.setDataset(m_Instances);
    return newInst;
}

From source file:clusterer.SimpleKMeansWithSilhouette.java

License:Open Source License

/**
 * Move the centroid to it's new coordinates. Generate the centroid
 * coordinates based on it's members (objects assigned to the cluster of the
 * centroid) and the distance function being used.
 * /* ww  w .j  a va  2 s .  co m*/
 * @param centroidIndex index of the centroid which the coordinates will be
 *          computed
 * @param members the objects that are assigned to the cluster of this
 *          centroid
 * @param updateClusterInfo if the method is supposed to update the m_Cluster
 *          arrays
 * @param addToCentroidInstances true if the method is to add the computed
 *          coordinates to the Instances holding the centroids
 * @return the centroid coordinates
 */
protected double[] moveCentroid(int centroidIndex, Instances members, boolean updateClusterInfo,
        boolean addToCentroidInstances) {

    double[] vals = new double[members.numAttributes()];
    double[][] nominalDists = new double[members.numAttributes()][];
    double[] weightMissing = new double[members.numAttributes()];
    double[] weightNonMissing = new double[members.numAttributes()];

    // Quickly calculate some relevant statistics 
    for (int j = 0; j < members.numAttributes(); j++) {
        if (members.attribute(j).isNominal()) {
            nominalDists[j] = new double[members.attribute(j).numValues()];
        }
    }
    for (Instance inst : members) {
        for (int j = 0; j < members.numAttributes(); j++) {
            if (inst.isMissing(j)) {
                weightMissing[j] += inst.weight();
            } else {
                weightNonMissing[j] += inst.weight();
                if (members.attribute(j).isNumeric()) {
                    vals[j] += inst.weight() * inst.value(j); // Will be overwritten in Manhattan case
                } else {
                    nominalDists[j][(int) inst.value(j)] += inst.weight();
                }
            }
        }
    }
    for (int j = 0; j < members.numAttributes(); j++) {
        if (members.attribute(j).isNumeric()) {
            if (weightNonMissing[j] > 0) {
                vals[j] /= weightNonMissing[j];
            } else {
                vals[j] = Utils.missingValue();
            }
        } else {
            double max = -Double.MAX_VALUE;
            double maxIndex = -1;
            for (int i = 0; i < nominalDists[j].length; i++) {
                if (nominalDists[j][i] > max) {
                    max = nominalDists[j][i];
                    maxIndex = i;
                }
                if (max < weightMissing[j]) {
                    vals[j] = Utils.missingValue();
                } else {
                    vals[j] = maxIndex;
                }
            }
        }
    }

    if (m_DistanceFunction instanceof ManhattanDistance) {

        // Need to replace means by medians
        Instances sortedMembers = null;
        int middle = (members.numInstances() - 1) / 2;
        boolean dataIsEven = ((members.numInstances() % 2) == 0);
        if (m_PreserveOrder) {
            sortedMembers = members;
        } else {
            sortedMembers = new Instances(members);
        }
        for (int j = 0; j < members.numAttributes(); j++) {
            if ((weightNonMissing[j] > 0) && members.attribute(j).isNumeric()) {
                // singleton special case
                if (members.numInstances() == 1) {
                    vals[j] = members.instance(0).value(j);
                } else {
                    vals[j] = sortedMembers.kthSmallestValue(j, middle + 1);
                    if (dataIsEven) {
                        vals[j] = (vals[j] + sortedMembers.kthSmallestValue(j, middle + 2)) / 2;
                    }
                }
            }
        }
    }

    if (updateClusterInfo) {
        for (int j = 0; j < members.numAttributes(); j++) {
            m_ClusterMissingCounts[centroidIndex][j] = weightMissing[j];
            m_ClusterNominalCounts[centroidIndex][j] = nominalDists[j];
        }
    }

    if (addToCentroidInstances) {
        m_ClusterCentroids.add(new DenseInstance(1.0, vals));
    }

    return vals;
}

From source file:cn.edu.xjtu.dbmine.source.NaiveBayes.java

License:Open Source License

/**
 * Generates the classifier./*from  w w  w.  j  a v  a  2s.  co  m*/
 *
 * @param instances set of instances serving as training data 
 * @exception Exception if the classifier has not been generated 
 * successfully
 */
public void buildClassifier(Instances instances) throws Exception {

    // can classifier handle the data?
    getCapabilities().testWithFail(instances);

    // remove instances with missing class
    instances = new Instances(instances);
    instances.deleteWithMissingClass();

    m_NumClasses = instances.numClasses();

    // Copy the instances
    m_Instances = new Instances(instances);

    // Discretize instances if required
    if (m_UseDiscretization) {
        m_Disc = new weka.filters.supervised.attribute.Discretize();
        m_Disc.setInputFormat(m_Instances);
        m_Instances = weka.filters.Filter.useFilter(m_Instances, m_Disc);
    } else {
        m_Disc = null;
    }

    // Reserve space for the distributions
    m_Distributions = new Estimator[m_Instances.numAttributes() - 1][m_Instances.numClasses()];
    m_ClassDistribution = new DiscreteEstimator(m_Instances.numClasses(), true);
    int attIndex = 0;
    Enumeration enu = m_Instances.enumerateAttributes();
    while (enu.hasMoreElements()) {
        Attribute attribute = (Attribute) enu.nextElement();

        // If the attribute is numeric, determine the estimator 
        // numeric precision from differences between adjacent values
        double numPrecision = DEFAULT_NUM_PRECISION;
        if (attribute.type() == Attribute.NUMERIC) {
            m_Instances.sort(attribute);
            if ((m_Instances.numInstances() > 0) && !m_Instances.instance(0).isMissing(attribute)) {
                double lastVal = m_Instances.instance(0).value(attribute);
                double currentVal, deltaSum = 0;
                int distinct = 0;
                for (int i = 1; i < m_Instances.numInstances(); i++) {
                    Instance currentInst = m_Instances.instance(i);
                    if (currentInst.isMissing(attribute)) {
                        break;
                    }
                    currentVal = currentInst.value(attribute);
                    if (currentVal != lastVal) {
                        deltaSum += currentVal - lastVal;
                        lastVal = currentVal;
                        distinct++;
                    }
                }
                if (distinct > 0) {
                    numPrecision = deltaSum / distinct;
                }
            }
        }

        for (int j = 0; j < m_Instances.numClasses(); j++) {
            switch (attribute.type()) {
            case Attribute.NUMERIC:
                if (m_UseKernelEstimator) {
                    m_Distributions[attIndex][j] = new KernelEstimator(numPrecision);
                } else {
                    m_Distributions[attIndex][j] = new NormalEstimator(numPrecision);
                }
                break;
            case Attribute.NOMINAL:
                m_Distributions[attIndex][j] = new DiscreteEstimator(attribute.numValues(), true);
                break;
            default:
                throw new Exception("Attribute type unknown to NaiveBayes");
            }
        }
        attIndex++;
    }

    // Compute counts
    Enumeration enumInsts = m_Instances.enumerateInstances();
    while (enumInsts.hasMoreElements()) {
        Instance instance = (Instance) enumInsts.nextElement();
        updateClassifier(instance);
    }

    // Save space
    m_Instances = new Instances(m_Instances, 0);
}

From source file:cn.edu.xjtu.dbmine.source.NaiveBayes.java

License:Open Source License

/**
 * Updates the classifier with the given instance.
 *
 * @param instance the new training instance to include in the model 
 * @exception Exception if the instance could not be incorporated in
 * the model.//from w  w w.j  a va  2s.co  m
 */
public void updateClassifier(Instance instance) throws Exception {

    if (!instance.classIsMissing()) {
        Enumeration enumAtts = m_Instances.enumerateAttributes();
        int attIndex = 0;
        while (enumAtts.hasMoreElements()) {
            Attribute attribute = (Attribute) enumAtts.nextElement();
            if (!instance.isMissing(attribute)) {
                m_Distributions[attIndex][(int) instance.classValue()].addValue(instance.value(attribute),
                        instance.weight());
            }
            attIndex++;
        }
        m_ClassDistribution.addValue(instance.classValue(), instance.weight());
    }
}