Example usage for weka.core Instance value

List of usage examples for weka.core Instance value

Introduction

In this page you can find the example usage for weka.core Instance value.

Prototype

public double value(Attribute att);

Source Link

Document

Returns an instance's attribute value in internal format.

Usage

From source file:kea.NumbersFilter.java

License:Open Source License

/** 
 * Converts an instance. A phrase boundary is inserted where
 * a number is found.// w w w .j  a v a 2 s.c  o m
 */
private void convertInstance(Instance instance) throws Exception {

    double[] instVals = new double[instance.numAttributes()];

    for (int i = 0; i < instance.numAttributes(); i++) {
        if ((!instance.attribute(i).isString()) || instance.isMissing(i)) {
            instVals[i] = instance.value(i);
        } else {
            String str = instance.stringValue(i);
            StringBuffer resultStr = new StringBuffer();
            StringTokenizer tok = new StringTokenizer(str, " \t\n", true);
            while (tok.hasMoreTokens()) {
                String token = tok.nextToken();

                // Everything that doesn't contain at least
                // one letter is considered to be a number
                boolean isNumber = true;
                for (int j = 0; j < token.length(); j++) {
                    if (Character.isLetter(token.charAt(j))) {
                        isNumber = false;
                        break;
                    }
                }
                if (!isNumber) {
                    resultStr.append(token);
                } else {
                    if (token.equals(" ") || token.equals("\t") || token.equals("\n")) {
                        resultStr.append(token);
                    } else {
                        resultStr.append(" \n ");
                    }
                }
            }
            int index = getOutputFormat().attribute(i).addStringValue(resultStr.toString());
            instVals[i] = (double) index;
        }
    }
    Instance inst = new Instance(instance.weight(), instVals);
    inst.setDataset(getOutputFormat());
    push(inst);
}

From source file:kmeans.MyKMeans.java

void updateCentroidForNumeric(int numCentroid, int numAttr) {
    //  System.out.println("Update centroid "+numCentroid+" attr "+dataSource.attribute(numAttr)+"|"+numAttr);
    List<Integer> listInst = listClusteredInstance.get(numCentroid);
    Attribute attr = dataSource.attribute(numAttr);
    double sum = 0;
    for (int i = 0; i < listInst.size(); i++) {
        Instance inst = dataSource.get(listInst.get(i));
        sum += inst.value(attr);
    }//w  ww  . j  av  a  2s.c  o m
    double newValue = (double) sum / listInst.size();
    Instance tempCentroid = centroid.get(numCentroid);
    tempCentroid.setValue(attr, newValue);
    centroid.set(numCentroid, tempCentroid);
}

From source file:knn.KNNClassifier.java

public static double getClassification(List<Instance> instances) {

    // grab the class index, create a blank map of type (double, integer)
    // set the maximum counts and values to zero
    int index = instances.get(0).classIndex();
    HashMap<Double, Integer> counts = new HashMap<>();
    int maxCount = 0;
    double maxValue = 0;

    // for each instance in instances: take val of instance and put in map
    for (Instance instance : instances) {

        double val = instance.value(index);

        // is it the first time? if not put in second slot
        if (!counts.containsKey(val)) {

            counts.put(val, 1);
        } else {//w  ww  .  j  a  va  2s.  c  o m

            counts.put(val, counts.get(val) + 1);
        }
    }

    // for each entry of type entry (double, integer) in counts (map)
    // if the entry's value is greater than the maxCount...
    for (Entry<Double, Integer> entry : counts.entrySet()) {

        // if val is greater maxCount, assign the maxCount and maxVal
        // to the entry's val and key respectively
        if (entry.getValue() > maxCount) {

            maxCount = entry.getValue();
            maxValue = entry.getKey();
        }
    }

    return maxValue;
}

From source file:knn.KNNClassifier.java

double EuclideanDistance(Instance instanceLHS, Instance instanceRHS) {

    // set dist to 0
    double distance = 0;

    // from index 0 to left and right side's number of attributes - 1...
    for (int i = 0; i < instanceLHS.numAttributes() - 1 && i < instanceRHS.numAttributes() - 1; i++) {

        // if left and right side's attributes are numeric, set the distance equal
        // to the value of left value - right value all squared
        if (instanceLHS.attribute(i).isNumeric() && instanceRHS.attribute(i).isNumeric()) {

            distance += pow(instanceLHS.value(i) - instanceRHS.value(i), 2);
        } else {//from   ww  w  . java  2 s.com

            // else add 5 to the distance unless left and right side's string converted
            // values are equal to one another, in which case set dist back to zero
            if (instanceLHS.stringValue(i).equals(instanceRHS.stringValue(i))) {

                distance += 0;
            }

            distance += 5;
        }
    }

    return distance;
}

From source file:knn.KNNClassifier.java

double ManhattenDistance(Instance instanceLHS, Instance instanceRHS) {

    // set dist to 0
    double distance = 0;

    // from index 0 to number of attributes - 1 on both sides...
    for (int i = 0; i < instanceLHS.numAttributes() - 1 && i < instanceRHS.numAttributes() - 1; i++) {

        // if left and right side's attributes are numbers set distance equal
        // to absolute value of left's value - right's value
        if (instanceLHS.attribute(i).isNumeric() && instanceRHS.attribute(i).isNumeric()) {

            distance += abs(instanceLHS.value(i) - instanceRHS.value(i));
        } else {/*from   w w  w . j a  va2s .  c om*/

            // else add 5 to distance unless left and right are equal, in which 
            // case set the distance back to 0
            if (instanceLHS.stringValue(i).equals(instanceRHS.stringValue(i))) {

                distance = 0;
            }

            distance += 5;
        }
    }

    return distance;
}

From source file:knnclassifier.KNNClassifier.java

double EuclideanDistance(Instance instanceLHS, Instance instanceRHS) {

    double distance = 0;

    for (int i = 0; i < instanceLHS.numAttributes() - 1 && i < instanceRHS.numAttributes() - 1; i++) {

        if (instanceLHS.attribute(i).isNumeric() && instanceRHS.attribute(i).isNumeric()) {

            distance += pow(instanceLHS.value(i) - instanceRHS.value(i), 2);
        } else {//from w w  w.  jav  a2s. com

            if (instanceLHS.stringValue(i).equals(instanceRHS.stringValue(i))) {

                distance += 0;
            }

            distance += 5;
        }
    }

    return distance;
}

From source file:knnclassifier.KNNClassifier.java

double ManhattenDistance(Instance instanceLHS, Instance instanceRHS) {

    double distance = 0;

    for (int i = 0; i < instanceLHS.numAttributes() - 1 && i < instanceRHS.numAttributes() - 1; i++) {

        if (instanceLHS.attribute(i).isNumeric() && instanceRHS.attribute(i).isNumeric()) {

            distance += abs(instanceLHS.value(i) - instanceRHS.value(i));
        } else {//from w w w. j av a2 s. co  m

            if (instanceLHS.stringValue(i).equals(instanceRHS.stringValue(i))) {

                distance = 0;
            }

            distance += 5;
        }
    }

    return distance;
}

From source file:lineage.AAFClusterer.java

License:Open Source License

/**
 * K-Means Clustering//from www .jav  a2  s .c o  m
 * @param data - matrix of observations (numObs x numFeatures)
 * @param k - number of clusters
 */
public Cluster[] kmeans(double[][] data, int numObs, int numFeatures, int k) {
    Instances ds = convertMatrixToWeka(data, numObs, numFeatures);

    // uses Euclidean distance by default
    SimpleKMeans clusterer = new SimpleKMeans();
    try {
        clusterer.setPreserveInstancesOrder(true);
        clusterer.setNumClusters(k);
        clusterer.buildClusterer(ds);

        // cluster centers
        Instances centers = clusterer.getClusterCentroids();
        Cluster[] clusters = new Cluster[centers.numInstances()];
        for (int i = 0; i < centers.numInstances(); i++) {
            Instance inst = centers.instance(i);
            double[] mean = new double[inst.numAttributes()];
            for (int j = 0; j < mean.length; j++) {
                mean[j] = inst.value(j);
            }
            clusters[i] = new Cluster(mean, i);
        }

        // cluster members
        int[] assignments = clusterer.getAssignments();
        for (int i = 0; i < assignments.length; i++) {
            clusters[assignments[i]].addMember(i);
        }
        return clusters;
    } catch (Exception e) {
        e.printStackTrace();
        System.exit(-1);
        return null;
    }

}

From source file:lineage.AAFClusterer.java

License:Open Source License

/**
 * Expectation Maximization clustering//w  w  w  . ja v  a2 s.  c  o m
 * @param data - matrix of observations (numObs x numFeatures)
 * @param k - number of clusters
 */
public Cluster[] em(double[][] data, int numObs, int numFeatures) {
    Instances ds = convertMatrixToWeka(data, numObs, numFeatures);
    EM clusterer = new EM();
    try {
        clusterer.buildClusterer(ds);
        ClusterEvaluation eval = new ClusterEvaluation();
        eval.setClusterer(clusterer);
        eval.evaluateClusterer(new Instances(ds));
        int numClusters = eval.getNumClusters();

        Cluster[] clusters = new Cluster[numClusters];
        double[][] clusterCentroids = new double[numClusters][numFeatures];
        int[] clusterCount = new int[numClusters];

        double[] assignments = eval.getClusterAssignments();
        for (int i = 0; i < ds.numInstances(); i++) {
            Instance inst = ds.instance(i);
            int clusterId = (int) assignments[i];
            for (int j = 0; j < numFeatures; j++) {
                clusterCentroids[clusterId][j] += inst.value(j);
            }
            clusterCount[clusterId]++;
        }

        for (int i = 0; i < numClusters; i++) {
            double[] mean = new double[numFeatures];
            for (int j = 0; j < numFeatures; j++) {
                mean[j] = clusterCentroids[i][j] / clusterCount[i];
            }
            clusters[i] = new Cluster(mean, i);
        }

        // cluster members & std dev
        double[][] clusterStdDev = new double[numClusters][numFeatures];
        for (int i = 0; i < ds.numInstances(); i++) {
            int clusterId = (int) assignments[i];
            clusters[clusterId].addMember(i);
            for (int j = 0; j < numFeatures; j++) {
                clusterStdDev[clusterId][j] += Math
                        .pow(ds.instance(i).value(j) - clusters[clusterId].getCentroid()[j], 2);
            }
        }

        for (int i = 0; i < numClusters; i++) {
            double[] dev = new double[numFeatures];
            for (int j = 0; j < numFeatures; j++) {
                dev[j] = Math.sqrt(clusterStdDev[i][j] / clusterCount[i]);
            }
            clusters[i].setStdDev(dev);
        }

        return clusters;
    } catch (Exception e) {
        e.printStackTrace();
        System.exit(-1);
        return null;
    }
}

From source file:LogReg.Logistic.java

License:Open Source License

/**
 * Builds the classifier//from   ww w.  ja  v  a2  s . c  o  m
 *
 * @param train the training data to be used for generating the
 * boosted classifier.
 * @throws Exception if the classifier could not be built successfully
 */
public void buildClassifier(Instances train) throws Exception {
    // can classifier handle the data?
    getCapabilities().testWithFail(train);

    // remove instances with missing class
    train = new Instances(train);
    train.deleteWithMissingClass();

    // Replace missing values   
    m_ReplaceMissingValues = new ReplaceMissingValues();
    m_ReplaceMissingValues.setInputFormat(train);
    train = Filter.useFilter(train, m_ReplaceMissingValues);

    // Remove useless attributes
    m_AttFilter = new RemoveUseless();
    m_AttFilter.setInputFormat(train);
    train = Filter.useFilter(train, m_AttFilter);

    // Transform attributes
    m_NominalToBinary = new NominalToBinary();
    m_NominalToBinary.setInputFormat(train);
    train = Filter.useFilter(train, m_NominalToBinary);

    // Save the structure for printing the model
    m_structure = new Instances(train, 0);

    // Extract data
    m_ClassIndex = train.classIndex();
    m_NumClasses = train.numClasses();

    int nK = m_NumClasses - 1; // Only K-1 class labels needed 
    int nR = m_NumPredictors = train.numAttributes() - 1;
    int nC = train.numInstances();

    m_Data = new double[nC][nR + 1]; // Data values
    int[] Y = new int[nC]; // Class labels
    double[] xMean = new double[nR + 1]; // Attribute means
    xSD = new double[nR + 1]; // Attribute stddev's
    double[] sY = new double[nK + 1]; // Number of classes
    double[] weights = new double[nC]; // Weights of instances
    double totWeights = 0; // Total weights of the instances
    m_Par = new double[nR + 1][nK]; // Optimized parameter values

    if (m_Debug) {
        System.out.println("Extracting data...");
    }

    for (int i = 0; i < nC; i++) {
        // initialize X[][]
        Instance current = train.instance(i);
        Y[i] = (int) current.classValue(); // Class value starts from 0
        weights[i] = current.weight(); // Dealing with weights
        totWeights += weights[i];

        m_Data[i][0] = 1;
        int j = 1;
        for (int k = 0; k <= nR; k++) {
            if (k != m_ClassIndex) {
                double x = current.value(k);
                m_Data[i][j] = x;
                xMean[j] += weights[i] * x;
                xSD[j] += weights[i] * x * x;
                j++;
            }
        }

        // Class count
        sY[Y[i]]++;
    }

    if ((totWeights <= 1) && (nC > 1))
        throw new Exception("Sum of weights of instances less than 1, please reweight!");

    xMean[0] = 0;
    xSD[0] = 1;
    for (int j = 1; j <= nR; j++) {
        xMean[j] = xMean[j] / totWeights;
        if (totWeights > 1)
            xSD[j] = Math.sqrt(Math.abs(xSD[j] - totWeights * xMean[j] * xMean[j]) / (totWeights - 1));
        else
            xSD[j] = 0;
    }

    if (m_Debug) {
        // Output stats about input data
        System.out.println("Descriptives...");
        for (int m = 0; m <= nK; m++)
            System.out.println(sY[m] + " cases have class " + m);
        System.out.println("\n Variable     Avg       SD    ");
        for (int j = 1; j <= nR; j++)
            System.out.println(Utils.doubleToString(j, 8, 4) + Utils.doubleToString(xMean[j], 10, 4)
                    + Utils.doubleToString(xSD[j], 10, 4));
    }

    // Normalise input data 
    for (int i = 0; i < nC; i++) {
        for (int j = 0; j <= nR; j++) {
            if (xSD[j] != 0) {
                m_Data[i][j] = (m_Data[i][j] - xMean[j]) / xSD[j];
            }
        }
    }

    if (m_Debug) {
        System.out.println("\nIteration History...");
    }

    double x[] = new double[(nR + 1) * nK];
    double[][] b = new double[2][x.length]; // Boundary constraints, N/A here

    // Initialize
    for (int p = 0; p < nK; p++) {
        int offset = p * (nR + 1);
        x[offset] = Math.log(sY[p] + 1.0) - Math.log(sY[nK] + 1.0); // Null model
        b[0][offset] = Double.NaN;
        b[1][offset] = Double.NaN;
        for (int q = 1; q <= nR; q++) {
            x[offset + q] = 0.0;
            b[0][offset + q] = Double.NaN;
            b[1][offset + q] = Double.NaN;
        }
    }

    OptEng opt = new OptEng();
    opt.setDebug(m_Debug);
    opt.setWeights(weights);
    opt.setClassLabels(Y);

    if (m_MaxIts == -1) { // Search until convergence
        x = opt.findArgmin(x, b);
        while (x == null) {
            x = opt.getVarbValues();
            if (m_Debug)
                System.out.println("200 iterations finished, not enough!");
            x = opt.findArgmin(x, b);
        }
        if (m_Debug)
            System.out.println(" -------------<Converged>--------------");
    } else {
        opt.setMaxIteration(m_MaxIts);
        x = opt.findArgmin(x, b);
        if (x == null) // Not enough, but use the current value
            x = opt.getVarbValues();
    }

    m_LL = -opt.getMinFunction(); // Log-likelihood

    // Don't need data matrix anymore
    m_Data = null;

    // Convert coefficients back to non-normalized attribute units
    for (int i = 0; i < nK; i++) {
        m_Par[0][i] = x[i * (nR + 1)];
        for (int j = 1; j <= nR; j++) {
            m_Par[j][i] = x[i * (nR + 1) + j];
            if (xSD[j] != 0) {
                m_Par[j][i] /= xSD[j];
                m_Par[0][i] -= m_Par[j][i] * xMean[j];
            }
        }
    }
}