Example usage for weka.core Instance numAttributes

List of usage examples for weka.core Instance numAttributes

Introduction

In this page you can find the example usage for weka.core Instance numAttributes.

Prototype

public int numAttributes();

Source Link

Document

Returns the number of attributes.

Usage

From source file:kea.NumbersFilter.java

License:Open Source License

/** 
 * Converts an instance. A phrase boundary is inserted where
 * a number is found./*from   www.  jav a 2 s.c  o m*/
 */
private void convertInstance(Instance instance) throws Exception {

    double[] instVals = new double[instance.numAttributes()];

    for (int i = 0; i < instance.numAttributes(); i++) {
        if ((!instance.attribute(i).isString()) || instance.isMissing(i)) {
            instVals[i] = instance.value(i);
        } else {
            String str = instance.stringValue(i);
            StringBuffer resultStr = new StringBuffer();
            StringTokenizer tok = new StringTokenizer(str, " \t\n", true);
            while (tok.hasMoreTokens()) {
                String token = tok.nextToken();

                // Everything that doesn't contain at least
                // one letter is considered to be a number
                boolean isNumber = true;
                for (int j = 0; j < token.length(); j++) {
                    if (Character.isLetter(token.charAt(j))) {
                        isNumber = false;
                        break;
                    }
                }
                if (!isNumber) {
                    resultStr.append(token);
                } else {
                    if (token.equals(" ") || token.equals("\t") || token.equals("\n")) {
                        resultStr.append(token);
                    } else {
                        resultStr.append(" \n ");
                    }
                }
            }
            int index = getOutputFormat().attribute(i).addStringValue(resultStr.toString());
            instVals[i] = (double) index;
        }
    }
    Instance inst = new Instance(instance.weight(), instVals);
    inst.setDataset(getOutputFormat());
    push(inst);
}

From source file:knn.KNNClassifier.java

double EuclideanDistance(Instance instanceLHS, Instance instanceRHS) {

    // set dist to 0
    double distance = 0;

    // from index 0 to left and right side's number of attributes - 1...
    for (int i = 0; i < instanceLHS.numAttributes() - 1 && i < instanceRHS.numAttributes() - 1; i++) {

        // if left and right side's attributes are numeric, set the distance equal
        // to the value of left value - right value all squared
        if (instanceLHS.attribute(i).isNumeric() && instanceRHS.attribute(i).isNumeric()) {

            distance += pow(instanceLHS.value(i) - instanceRHS.value(i), 2);
        } else {//from  w w  w .j  a  v a2 s.  c  o m

            // else add 5 to the distance unless left and right side's string converted
            // values are equal to one another, in which case set dist back to zero
            if (instanceLHS.stringValue(i).equals(instanceRHS.stringValue(i))) {

                distance += 0;
            }

            distance += 5;
        }
    }

    return distance;
}

From source file:knn.KNNClassifier.java

double ManhattenDistance(Instance instanceLHS, Instance instanceRHS) {

    // set dist to 0
    double distance = 0;

    // from index 0 to number of attributes - 1 on both sides...
    for (int i = 0; i < instanceLHS.numAttributes() - 1 && i < instanceRHS.numAttributes() - 1; i++) {

        // if left and right side's attributes are numbers set distance equal
        // to absolute value of left's value - right's value
        if (instanceLHS.attribute(i).isNumeric() && instanceRHS.attribute(i).isNumeric()) {

            distance += abs(instanceLHS.value(i) - instanceRHS.value(i));
        } else {//from  w  ww .j  av a2s.  c  om

            // else add 5 to distance unless left and right are equal, in which 
            // case set the distance back to 0
            if (instanceLHS.stringValue(i).equals(instanceRHS.stringValue(i))) {

                distance = 0;
            }

            distance += 5;
        }
    }

    return distance;
}

From source file:knnclassifier.KNNClassifier.java

double EuclideanDistance(Instance instanceLHS, Instance instanceRHS) {

    double distance = 0;

    for (int i = 0; i < instanceLHS.numAttributes() - 1 && i < instanceRHS.numAttributes() - 1; i++) {

        if (instanceLHS.attribute(i).isNumeric() && instanceRHS.attribute(i).isNumeric()) {

            distance += pow(instanceLHS.value(i) - instanceRHS.value(i), 2);
        } else {//from   w ww. j a v a2  s .c o m

            if (instanceLHS.stringValue(i).equals(instanceRHS.stringValue(i))) {

                distance += 0;
            }

            distance += 5;
        }
    }

    return distance;
}

From source file:knnclassifier.KNNClassifier.java

double ManhattenDistance(Instance instanceLHS, Instance instanceRHS) {

    double distance = 0;

    for (int i = 0; i < instanceLHS.numAttributes() - 1 && i < instanceRHS.numAttributes() - 1; i++) {

        if (instanceLHS.attribute(i).isNumeric() && instanceRHS.attribute(i).isNumeric()) {

            distance += abs(instanceLHS.value(i) - instanceRHS.value(i));
        } else {//ww  w . j a v  a  2 s . c o m

            if (instanceLHS.stringValue(i).equals(instanceRHS.stringValue(i))) {

                distance = 0;
            }

            distance += 5;
        }
    }

    return distance;
}

From source file:lineage.AAFClusterer.java

License:Open Source License

/**
 * K-Means Clustering//from  w  w w.ja va  2 s. c  o m
 * @param data - matrix of observations (numObs x numFeatures)
 * @param k - number of clusters
 */
public Cluster[] kmeans(double[][] data, int numObs, int numFeatures, int k) {
    Instances ds = convertMatrixToWeka(data, numObs, numFeatures);

    // uses Euclidean distance by default
    SimpleKMeans clusterer = new SimpleKMeans();
    try {
        clusterer.setPreserveInstancesOrder(true);
        clusterer.setNumClusters(k);
        clusterer.buildClusterer(ds);

        // cluster centers
        Instances centers = clusterer.getClusterCentroids();
        Cluster[] clusters = new Cluster[centers.numInstances()];
        for (int i = 0; i < centers.numInstances(); i++) {
            Instance inst = centers.instance(i);
            double[] mean = new double[inst.numAttributes()];
            for (int j = 0; j < mean.length; j++) {
                mean[j] = inst.value(j);
            }
            clusters[i] = new Cluster(mean, i);
        }

        // cluster members
        int[] assignments = clusterer.getAssignments();
        for (int i = 0; i < assignments.length; i++) {
            clusters[assignments[i]].addMember(i);
        }
        return clusters;
    } catch (Exception e) {
        e.printStackTrace();
        System.exit(-1);
        return null;
    }

}

From source file:lu.lippmann.cdb.common.gui.dataset.InstanceFormatter.java

License:Open Source License

public static String htmlFormat(final Instance inst, final boolean withHTMLHeader) {
    final StringBuilder sb = new StringBuilder();
    if (withHTMLHeader)
        sb.append("<html><body>");
    for (int i = 0; i < inst.numAttributes(); i++) {
        sb.append(StringEscapeUtils.escapeHtml(inst.attribute(i).name())).append(" = ");
        sb.append("<b>");
        if (inst.attribute(i).isNominal() || inst.attribute(i).isString()) {
            sb.append(StringEscapeUtils.escapeHtml(inst.stringValue(i)));
        } else if (inst.attribute(i).isDate()) {
            final Calendar cal = Calendar.getInstance();
            cal.setTimeInMillis((long) inst.value(i));
            sb.append(FormatterUtil.DATE_FORMAT.format(cal.getTime()));
        } else if (inst.attribute(i).isNumeric()) {
            sb.append(inst.value(i));//from  w  ww .j  a v  a  2s  . c o m
        }
        sb.append("</b>");
        sb.append("<br/>");
    }
    if (withHTMLHeader)
        sb.append("</body></html>");
    return sb.toString();
}

From source file:lu.lippmann.cdb.lab.beta.shih.Shih2010.java

License:Open Source License

/**
 * /*from   w ww  .  j  av  a 2  s  . co m*/
 * @return
 */
public Instances getModifiedInstances() {

    //Copy attribute list (and change categorical by numerical)
    final ArrayList<Attribute> lAttrs = new ArrayList<Attribute>();
    for (int i = 0; i < instances.numAttributes(); i++) {
        Attribute attr = instances.attribute(i);
        if (attr.isNumeric() || attr.index() == instances.classIndex()) {
            lAttrs.add(attr);
        } else {
            Attribute newAttr = new Attribute(attr.name());
            lAttrs.add(newAttr);
        }
    }

    //Build new instance
    final Instances newInstances = new Instances("Shih instance", lAttrs, instances.numInstances());
    newInstances.setClassIndex(instances.classIndex());
    for (int i = 0; i < instances.numInstances(); i++) {
        final Instance instance = instances.instance(i);
        final Instance cpyInstance = (Instance) instance.copy();
        for (int j = 0; j < instance.numAttributes(); j++) {
            Attribute attribute = instance.attribute(j);
            int k = 0;
            if (attribute.index() == instances.classIndex()) {
                //The class index is nominal
                cpyInstance.setValue(attribute, instance.stringValue(j));
            } else if (!attribute.isNumeric()) {
                String elt = attribute.value((int) instance.value(j));
                cpyInstance.setValue(attribute, F.get(new TupleSI(elt, j)));
            } else {
                if (maxNum[k] > 1) {
                    cpyInstance.setValue(attribute, instance.value(j) / maxNum[k]);
                }
                k++;
            }
        }
        newInstances.add(cpyInstance);
    }

    if (ignoreClass && instances.classIndex() != -1) {
        newInstances.deleteAttributeAt(instances.classIndex());
    }
    return newInstances;
}

From source file:lu.lippmann.cdb.lab.beta.util.WekaUtil2.java

License:Open Source License

/**
 * Generate the centroid coordinates based 
 * on it's  members (objects assigned to the cluster of the centroid) and the distance 
 * function being used./*from ww  w . j a v a  2  s. com*/
 * @return the centroid
 */
public static MixedCentroid computeMixedCentroid(final boolean preserveOrder,
        final NormalizableDistance distanceFunction, final Instances numericInstances,
        final Instances originalInstances, final int clusterIndex) {
    final int numInstances = numericInstances.numInstances();
    final int numAttributes = numericInstances.numAttributes();

    final Map<TupleSI, Integer> addedAttr = new HashMap<TupleSI, Integer>();

    if (numInstances == 1) {
        Instance uniqueNumInstance = numericInstances.firstInstance();
        Instance uniqueMixInstance = originalInstances.firstInstance();
        double[] centroid = uniqueNumInstance.toDoubleArray();
        for (int i = 0; i < uniqueMixInstance.numAttributes(); i++) {
            if (!uniqueMixInstance.attribute(i).isNumeric()) {
                final String catVal = uniqueMixInstance.attribute(i).value((int) uniqueMixInstance.value(i));
                addedAttr.put(new TupleSI(catVal, i), 1);
            }
        }
        return new MixedCentroid(clusterIndex, centroid, addedAttr);
    }

    final double[] vals = new double[numAttributes];

    //used only for Manhattan Distance
    Instances sortedMembers = null;
    int middle = 0;
    boolean dataIsEven = false;

    final boolean isManhattanDist = (distanceFunction instanceof ManhattanDistance);
    final boolean isEuclideanDist = (distanceFunction instanceof EuclideanDistance);

    if (isManhattanDist) {
        middle = (numInstances - 1) / 2;
        dataIsEven = ((numInstances % 2) == 0);
        if (preserveOrder) {
            sortedMembers = numericInstances;
        } else {
            sortedMembers = new Instances(numericInstances);
        }
    }

    for (int j = 0; j < numAttributes; j++) {
        //in case of Euclidian distance the centroid is the mean point
        //in case of Manhattan distance the centroid is the median point
        //in both cases, if the attribute is nominal, the centroid is the mode            
        if (isEuclideanDist) {
            vals[j] = numericInstances.meanOrMode(j);

            for (int i = 0; i < numInstances; i++) {
                if (!originalInstances.attribute(j).isNumeric()) {
                    final Instance instance = originalInstances.instance(i);
                    final String catVal = instance.attribute(j).value((int) instance.value(j));
                    //Initialize map
                    final TupleSI key = new TupleSI(catVal, j);
                    if (!addedAttr.containsKey(key))
                        addedAttr.put(key, 0);
                    addedAttr.put(key, addedAttr.get(key) + 1);
                }
            }
        } else if (isManhattanDist) {
            sortedMembers.kthSmallestValue(j, middle + 1);
            vals[j] = sortedMembers.instance(middle).value(j);
            if (dataIsEven) {
                sortedMembers.kthSmallestValue(j, middle + 2);
                vals[j] = (vals[j] + sortedMembers.instance(middle + 1).value(j)) / 2;
            }
        } else {
            throw new IllegalStateException("Not handled distance ...");
        }
    }

    return new MixedCentroid(clusterIndex, vals, addedAttr);
}

From source file:machinelearningcw.EnhancedLinearPerceptron.java

@Override
public double classifyInstance(Instance instnc) throws Exception {
    double y = 0;
    //create a new instance so it doesnt change the orginal dataset
    Instance newInstance = new DenseInstance(instnc);
    if (setStandardiseAttributes) {
        standardizeAtrrbutes(newInstance);
    }//from ww  w. j  a v a2  s .c  om
    for (int i = 0; i < newInstance.numAttributes() - 1; i++) {
        y += w[i] * (newInstance.value(i));
    }

    return (y >= 0) ? 1 : 0;
}