Example usage for weka.core Instance value

Introduction

In this page you can find the example usage for weka.core Instance value.

Prototype

public double value(Attribute att);

Source Link

Document

Returns an instance's attribute value in internal format.

Usage

From source file:kea.NumbersFilter.java

License:Open Source License

/** 
 * Converts an instance. A phrase boundary is inserted where
 * a number is found.// w w w .j  a v a 2 s.c  o m
 */
private void convertInstance(Instance instance) throws Exception {

    double[] instVals = new double[instance.numAttributes()];

    for (int i = 0; i < instance.numAttributes(); i++) {
        if ((!instance.attribute(i).isString()) || instance.isMissing(i)) {
            instVals[i] = instance.value(i);
        } else {
            String str = instance.stringValue(i);
            StringBuffer resultStr = new StringBuffer();
            StringTokenizer tok = new StringTokenizer(str, " \t\n", true);
            while (tok.hasMoreTokens()) {
                String token = tok.nextToken();

                // Everything that doesn't contain at least
                // one letter is considered to be a number
                boolean isNumber = true;
                for (int j = 0; j < token.length(); j++) {
                    if (Character.isLetter(token.charAt(j))) {
                        isNumber = false;
                        break;
                    }
                }
                if (!isNumber) {
                    resultStr.append(token);
                } else {
                    if (token.equals(" ") || token.equals("\t") || token.equals("\n")) {
                        resultStr.append(token);
                    } else {
                        resultStr.append(" \n ");
                    }
                }
            }
            int index = getOutputFormat().attribute(i).addStringValue(resultStr.toString());
            instVals[i] = (double) index;
        }
    }
    Instance inst = new Instance(instance.weight(), instVals);
    inst.setDataset(getOutputFormat());
    push(inst);
}

From source file:kmeans.MyKMeans.java

void updateCentroidForNumeric(int numCentroid, int numAttr) {
    //  System.out.println("Update centroid "+numCentroid+" attr "+dataSource.attribute(numAttr)+"|"+numAttr);
    List<Integer> listInst = listClusteredInstance.get(numCentroid);
    Attribute attr = dataSource.attribute(numAttr);
    double sum = 0;
    for (int i = 0; i < listInst.size(); i++) {
        Instance inst = dataSource.get(listInst.get(i));
        sum += inst.value(attr);
    }//w  ww  . j  av  a  2s.c  o m
    double newValue = (double) sum / listInst.size();
    Instance tempCentroid = centroid.get(numCentroid);
    tempCentroid.setValue(attr, newValue);
    centroid.set(numCentroid, tempCentroid);
}

From source file:knn.KNNClassifier.java

public static double getClassification(List<Instance> instances) {

    // grab the class index, create a blank map of type (double, integer)
    // set the maximum counts and values to zero
    int index = instances.get(0).classIndex();
    HashMap<Double, Integer> counts = new HashMap<>();
    int maxCount = 0;
    double maxValue = 0;

    // for each instance in instances: take val of instance and put in map
    for (Instance instance : instances) {

        double val = instance.value(index);

        // is it the first time? if not put in second slot
        if (!counts.containsKey(val)) {

            counts.put(val, 1);
        } else {//w  ww  .  j  a  va  2s.  c  o m

            counts.put(val, counts.get(val) + 1);
        }
    }

    // for each entry of type entry (double, integer) in counts (map)
    // if the entry's value is greater than the maxCount...
    for (Entry<Double, Integer> entry : counts.entrySet()) {

        // if val is greater maxCount, assign the maxCount and maxVal
        // to the entry's val and key respectively
        if (entry.getValue() > maxCount) {

            maxCount = entry.getValue();
            maxValue = entry.getKey();
        }
    }

    return maxValue;
}

From source file:knn.KNNClassifier.java

double EuclideanDistance(Instance instanceLHS, Instance instanceRHS) {

    // set dist to 0
    double distance = 0;

    // from index 0 to left and right side's number of attributes - 1...
    for (int i = 0; i < instanceLHS.numAttributes() - 1 && i < instanceRHS.numAttributes() - 1; i++) {

        // if left and right side's attributes are numeric, set the distance equal
        // to the value of left value - right value all squared
        if (instanceLHS.attribute(i).isNumeric() && instanceRHS.attribute(i).isNumeric()) {

            distance += pow(instanceLHS.value(i) - instanceRHS.value(i), 2);
        } else {//from   ww  w  . java  2 s.com

            // else add 5 to the distance unless left and right side's string converted
            // values are equal to one another, in which case set dist back to zero
            if (instanceLHS.stringValue(i).equals(instanceRHS.stringValue(i))) {

                distance += 0;
            }

            distance += 5;
        }
    }

    return distance;
}

From source file:knn.KNNClassifier.java

double ManhattenDistance(Instance instanceLHS, Instance instanceRHS) {

    // set dist to 0
    double distance = 0;

    // from index 0 to number of attributes - 1 on both sides...
    for (int i = 0; i < instanceLHS.numAttributes() - 1 && i < instanceRHS.numAttributes() - 1; i++) {

        // if left and right side's attributes are numbers set distance equal
        // to absolute value of left's value - right's value
        if (instanceLHS.attribute(i).isNumeric() && instanceRHS.attribute(i).isNumeric()) {

            distance += abs(instanceLHS.value(i) - instanceRHS.value(i));
        } else {/*from   w w  w . j a  va2s .  c om*/

            // else add 5 to distance unless left and right are equal, in which 
            // case set the distance back to 0
            if (instanceLHS.stringValue(i).equals(instanceRHS.stringValue(i))) {

                distance = 0;
            }

            distance += 5;
        }
    }

    return distance;
}

From source file:knnclassifier.KNNClassifier.java

double EuclideanDistance(Instance instanceLHS, Instance instanceRHS) {

    double distance = 0;

    for (int i = 0; i < instanceLHS.numAttributes() - 1 && i < instanceRHS.numAttributes() - 1; i++) {

        if (instanceLHS.attribute(i).isNumeric() && instanceRHS.attribute(i).isNumeric()) {

            distance += pow(instanceLHS.value(i) - instanceRHS.value(i), 2);
        } else {//from w w  w.  jav  a2s. com

            if (instanceLHS.stringValue(i).equals(instanceRHS.stringValue(i))) {

                distance += 0;
            }

            distance += 5;
        }
    }

    return distance;
}

From source file:knnclassifier.KNNClassifier.java

double ManhattenDistance(Instance instanceLHS, Instance instanceRHS) {

    double distance = 0;

    for (int i = 0; i < instanceLHS.numAttributes() - 1 && i < instanceRHS.numAttributes() - 1; i++) {

        if (instanceLHS.attribute(i).isNumeric() && instanceRHS.attribute(i).isNumeric()) {

            distance += abs(instanceLHS.value(i) - instanceRHS.value(i));
        } else {//from w w w. j av a2 s. co  m

            if (instanceLHS.stringValue(i).equals(instanceRHS.stringValue(i))) {

                distance = 0;
            }

            distance += 5;
        }
    }

    return distance;
}

From source file:lineage.AAFClusterer.java

License:Open Source License

/**
 * K-Means Clustering//from www .jav  a2  s .c o  m
 * @param data - matrix of observations (numObs x numFeatures)
 * @param k - number of clusters
 */
public Cluster[] kmeans(double[][] data, int numObs, int numFeatures, int k) {
    Instances ds = convertMatrixToWeka(data, numObs, numFeatures);

    // uses Euclidean distance by default
    SimpleKMeans clusterer = new SimpleKMeans();
    try {
        clusterer.setPreserveInstancesOrder(true);
        clusterer.setNumClusters(k);
        clusterer.buildClusterer(ds);

        // cluster centers
        Instances centers = clusterer.getClusterCentroids();
        Cluster[] clusters = new Cluster[centers.numInstances()];
        for (int i = 0; i < centers.numInstances(); i++) {
            Instance inst = centers.instance(i);
            double[] mean = new double[inst.numAttributes()];
            for (int j = 0; j < mean.length; j++) {
                mean[j] = inst.value(j);
            }
            clusters[i] = new Cluster(mean, i);
        }

        // cluster members
        int[] assignments = clusterer.getAssignments();
        for (int i = 0; i < assignments.length; i++) {
            clusters[assignments[i]].addMember(i);
        }
        return clusters;
    } catch (Exception e) {
        e.printStackTrace();
        System.exit(-1);
        return null;
    }

}

From source file:lineage.AAFClusterer.java

License:Open Source License

/**
 * Expectation Maximization clustering//w  w  w  . ja v  a2 s.  c  o m
 * @param data - matrix of observations (numObs x numFeatures)
 * @param k - number of clusters
 */
public Cluster[] em(double[][] data, int numObs, int numFeatures) {
    Instances ds = convertMatrixToWeka(data, numObs, numFeatures);
    EM clusterer = new EM();
    try {
        clusterer.buildClusterer(ds);
        ClusterEvaluation eval = new ClusterEvaluation();
        eval.setClusterer(clusterer);
        eval.evaluateClusterer(new Instances(ds));
        int numClusters = eval.getNumClusters();

        Cluster[] clusters = new Cluster[numClusters];
        double[][] clusterCentroids = new double[numClusters][numFeatures];
        int[] clusterCount = new int[numClusters];

        double[] assignments = eval.getClusterAssignments();
        for (int i = 0; i < ds.numInstances(); i++) {
            Instance inst = ds.instance(i);
            int clusterId = (int) assignments[i];
            for (int j = 0; j < numFeatures; j++) {
                clusterCentroids[clusterId][j] += inst.value(j);
            }
            clusterCount[clusterId]++;
        }

        for (int i = 0; i < numClusters; i++) {
            double[] mean = new double[numFeatures];
            for (int j = 0; j < numFeatures; j++) {
                mean[j] = clusterCentroids[i][j] / clusterCount[i];
            }
            clusters[i] = new Cluster(mean, i);
        }

        // cluster members & std dev
        double[][] clusterStdDev = new double[numClusters][numFeatures];
        for (int i = 0; i < ds.numInstances(); i++) {
            int clusterId = (int) assignments[i];
            clusters[clusterId].addMember(i);
            for (int j = 0; j < numFeatures; j++) {
                clusterStdDev[clusterId][j] += Math
                        .pow(ds.instance(i).value(j) - clusters[clusterId].getCentroid()[j], 2);
            }
        }

        for (int i = 0; i < numClusters; i++) {
            double[] dev = new double[numFeatures];
            for (int j = 0; j < numFeatures; j++) {
                dev[j] = Math.sqrt(clusterStdDev[i][j] / clusterCount[i]);
            }
            clusters[i].setStdDev(dev);
        }

        return clusters;
    } catch (Exception e) {
        e.printStackTrace();
        System.exit(-1);
        return null;
    }
}

From source file:LogReg.Logistic.java

License:Open Source License

/**
 * Builds the classifier//from   ww w.  ja  v  a2  s . c  o  m
 *
 * @param train the training data to be used for generating the
 * boosted classifier.
 * @throws Exception if the classifier could not be built successfully
 */
public void buildClassifier(Instances train) throws Exception {
    // can classifier handle the data?
    getCapabilities().testWithFail(train);

    // remove instances with missing class
    train = new Instances(train);
    train.deleteWithMissingClass();

    // Replace missing values   
    m_ReplaceMissingValues = new ReplaceMissingValues();
    m_ReplaceMissingValues.setInputFormat(train);
    train = Filter.useFilter(train, m_ReplaceMissingValues);

    // Remove useless attributes
    m_AttFilter = new RemoveUseless();
    m_AttFilter.setInputFormat(train);
    train = Filter.useFilter(train, m_AttFilter);

    // Transform attributes
    m_NominalToBinary = new NominalToBinary();
    m_NominalToBinary.setInputFormat(train);
    train = Filter.useFilter(train, m_NominalToBinary);

    // Save the structure for printing the model
    m_structure = new Instances(train, 0);

    // Extract data
    m_ClassIndex = train.classIndex();
    m_NumClasses = train.numClasses();

    int nK = m_NumClasses - 1; // Only K-1 class labels needed 
    int nR = m_NumPredictors = train.numAttributes() - 1;
    int nC = train.numInstances();

    m_Data = new double[nC][nR + 1]; // Data values
    int[] Y = new int[nC]; // Class labels
    double[] xMean = new double[nR + 1]; // Attribute means
    xSD = new double[nR + 1]; // Attribute stddev's
    double[] sY = new double[nK + 1]; // Number of classes
    double[] weights = new double[nC]; // Weights of instances
    double totWeights = 0; // Total weights of the instances
    m_Par = new double[nR + 1][nK]; // Optimized parameter values

    if (m_Debug) {
        System.out.println("Extracting data...");
    }

    for (int i = 0; i < nC; i++) {
        // initialize X[][]
        Instance current = train.instance(i);
        Y[i] = (int) current.classValue(); // Class value starts from 0
        weights[i] = current.weight(); // Dealing with weights
        totWeights += weights[i];

        m_Data[i][0] = 1;
        int j = 1;
        for (int k = 0; k <= nR; k++) {
            if (k != m_ClassIndex) {
                double x = current.value(k);
                m_Data[i][j] = x;
                xMean[j] += weights[i] * x;
                xSD[j] += weights[i] * x * x;
                j++;
            }
        }

        // Class count
        sY[Y[i]]++;
    }

    if ((totWeights <= 1) && (nC > 1))
        throw new Exception("Sum of weights of instances less than 1, please reweight!");

    xMean[0] = 0;
    xSD[0] = 1;
    for (int j = 1; j <= nR; j++) {
        xMean[j] = xMean[j] / totWeights;
        if (totWeights > 1)
            xSD[j] = Math.sqrt(Math.abs(xSD[j] - totWeights * xMean[j] * xMean[j]) / (totWeights - 1));
        else
            xSD[j] = 0;
    }

    if (m_Debug) {
        // Output stats about input data
        System.out.println("Descriptives...");
        for (int m = 0; m <= nK; m++)
            System.out.println(sY[m] + " cases have class " + m);
        System.out.println("\n Variable     Avg       SD    ");
        for (int j = 1; j <= nR; j++)
            System.out.println(Utils.doubleToString(j, 8, 4) + Utils.doubleToString(xMean[j], 10, 4)
                    + Utils.doubleToString(xSD[j], 10, 4));
    }

    // Normalise input data 
    for (int i = 0; i < nC; i++) {
        for (int j = 0; j <= nR; j++) {
            if (xSD[j] != 0) {
                m_Data[i][j] = (m_Data[i][j] - xMean[j]) / xSD[j];
            }
        }
    }

    if (m_Debug) {
        System.out.println("\nIteration History...");
    }

    double x[] = new double[(nR + 1) * nK];
    double[][] b = new double[2][x.length]; // Boundary constraints, N/A here

    // Initialize
    for (int p = 0; p < nK; p++) {
        int offset = p * (nR + 1);
        x[offset] = Math.log(sY[p] + 1.0) - Math.log(sY[nK] + 1.0); // Null model
        b[0][offset] = Double.NaN;
        b[1][offset] = Double.NaN;
        for (int q = 1; q <= nR; q++) {
            x[offset + q] = 0.0;
            b[0][offset + q] = Double.NaN;
            b[1][offset + q] = Double.NaN;
        }
    }

    OptEng opt = new OptEng();
    opt.setDebug(m_Debug);
    opt.setWeights(weights);
    opt.setClassLabels(Y);

    if (m_MaxIts == -1) { // Search until convergence
        x = opt.findArgmin(x, b);
        while (x == null) {
            x = opt.getVarbValues();
            if (m_Debug)
                System.out.println("200 iterations finished, not enough!");
            x = opt.findArgmin(x, b);
        }
        if (m_Debug)
            System.out.println(" -------------<Converged>--------------");
    } else {
        opt.setMaxIteration(m_MaxIts);
        x = opt.findArgmin(x, b);
        if (x == null) // Not enough, but use the current value
            x = opt.getVarbValues();
    }

    m_LL = -opt.getMinFunction(); // Log-likelihood

    // Don't need data matrix anymore
    m_Data = null;

    // Convert coefficients back to non-normalized attribute units
    for (int i = 0; i < nK; i++) {
        m_Par[0][i] = x[i * (nR + 1)];
        for (int j = 1; j <= nR; j++) {
            m_Par[j][i] = x[i * (nR + 1) + j];
            if (xSD[j] != 0) {
                m_Par[j][i] /= xSD[j];
                m_Par[0][i] -= m_Par[j][i] * xMean[j];
            }
        }
    }
}