Example usage for weka.core Instance attribute

List of usage examples for weka.core Instance attribute

Introduction

In this page you can find the example usage for weka.core Instance attribute.

Prototype

public Attribute attribute(int index);

Source Link

Document

Returns the attribute with the given index.

Usage

From source file:knn.KNNClassifier.java

double EuclideanDistance(Instance instanceLHS, Instance instanceRHS) {

    // set dist to 0
    double distance = 0;

    // from index 0 to left and right side's number of attributes - 1...
    for (int i = 0; i < instanceLHS.numAttributes() - 1 && i < instanceRHS.numAttributes() - 1; i++) {

        // if left and right side's attributes are numeric, set the distance equal
        // to the value of left value - right value all squared
        if (instanceLHS.attribute(i).isNumeric() && instanceRHS.attribute(i).isNumeric()) {

            distance += pow(instanceLHS.value(i) - instanceRHS.value(i), 2);
        } else {/*from  w w  w. j  a v a  2 s.c o m*/

            // else add 5 to the distance unless left and right side's string converted
            // values are equal to one another, in which case set dist back to zero
            if (instanceLHS.stringValue(i).equals(instanceRHS.stringValue(i))) {

                distance += 0;
            }

            distance += 5;
        }
    }

    return distance;
}

From source file:knn.KNNClassifier.java

double ManhattenDistance(Instance instanceLHS, Instance instanceRHS) {

    // set dist to 0
    double distance = 0;

    // from index 0 to number of attributes - 1 on both sides...
    for (int i = 0; i < instanceLHS.numAttributes() - 1 && i < instanceRHS.numAttributes() - 1; i++) {

        // if left and right side's attributes are numbers set distance equal
        // to absolute value of left's value - right's value
        if (instanceLHS.attribute(i).isNumeric() && instanceRHS.attribute(i).isNumeric()) {

            distance += abs(instanceLHS.value(i) - instanceRHS.value(i));
        } else {//from w  ww.  j a v  a  2  s.c o m

            // else add 5 to distance unless left and right are equal, in which 
            // case set the distance back to 0
            if (instanceLHS.stringValue(i).equals(instanceRHS.stringValue(i))) {

                distance = 0;
            }

            distance += 5;
        }
    }

    return distance;
}

From source file:knnclassifier.KNNClassifier.java

double EuclideanDistance(Instance instanceLHS, Instance instanceRHS) {

    double distance = 0;

    for (int i = 0; i < instanceLHS.numAttributes() - 1 && i < instanceRHS.numAttributes() - 1; i++) {

        if (instanceLHS.attribute(i).isNumeric() && instanceRHS.attribute(i).isNumeric()) {

            distance += pow(instanceLHS.value(i) - instanceRHS.value(i), 2);
        } else {//from  ww  w .  j a v a  2 s  .  c o  m

            if (instanceLHS.stringValue(i).equals(instanceRHS.stringValue(i))) {

                distance += 0;
            }

            distance += 5;
        }
    }

    return distance;
}

From source file:knnclassifier.KNNClassifier.java

double ManhattenDistance(Instance instanceLHS, Instance instanceRHS) {

    double distance = 0;

    for (int i = 0; i < instanceLHS.numAttributes() - 1 && i < instanceRHS.numAttributes() - 1; i++) {

        if (instanceLHS.attribute(i).isNumeric() && instanceRHS.attribute(i).isNumeric()) {

            distance += abs(instanceLHS.value(i) - instanceRHS.value(i));
        } else {/*from  w  ww.  j a va2s  . c om*/

            if (instanceLHS.stringValue(i).equals(instanceRHS.stringValue(i))) {

                distance = 0;
            }

            distance += 5;
        }
    }

    return distance;
}

From source file:lattice.Lattice.java

License:Open Source License

/**
 * Constructor of a lattice over the given variables of the dataset.
 * /*from  w w w.  java2s .  co  m*/
 * @param dataset
 */
public Lattice(Instances dataset) {

    // ~ initialise internal structure for counting (TID sets)
    this.nbInstances = dataset.numInstances();
    this.nbVariables = dataset.numAttributes();

    BitSet[][] presence = new BitSet[nbVariables][];

    TreeSet<Integer> allAttributesNumbers = new TreeSet<Integer>();
    int[] nbValuesForAttribute = new int[nbVariables];
    for (int a = 0; a < nbVariables; a++) {
        nbValuesForAttribute[a] = dataset.numDistinctValues(a) + 1; //+1 for missing
        presence[a] = new BitSet[nbValuesForAttribute[a]];
        allAttributesNumbers.add(a);
        for (int v = 0; v < presence[a].length; v++) {
            presence[a][v] = new BitSet();
        }
    }

    for (int i = 0; i < nbInstances; i++) {
        Instance row = dataset.instance(i);
        for (int a = 0; a < nbVariables; a++) {

            int indexOfValue;
            if (row.isMissing(a)) {
                //               indexOfValue = (int) dataset.meanOrMode(a);
                indexOfValue = dataset.numDistinctValues(a); //missing at the end
            } else {
                String value = row.stringValue(a);
                indexOfValue = row.attribute(a).indexOfValue(value);
            }
            presence[a][indexOfValue].set(i);

        }
    }

    // initialise the first nodes of the lattice (i.e., the ones
    // corresponding to single variables
    this.all = new LatticeNode(this, nbValuesForAttribute);
    this.singleNodes = new LatticeNode[nbVariables];
    for (int a = 0; a < nbVariables; a++) {
        int[] variablesNumbers = { a };
        LatticeNode node = new LatticeNode(this, variablesNumbers, nbValuesForAttribute, presence[a], all);
        singleNodes[a] = node;
    }

}

From source file:lattice.Lattice.java

License:Open Source License

public Lattice(Instances structure, ArffReader loader) throws IOException {
    // ~ initialise internal structure for counting (TID sets)
    this.nbInstances = 0;
    this.nbVariables = structure.numAttributes();

    BitSet[][] presence = new BitSet[nbVariables][];

    TreeSet<Integer> allAttributesNumbers = new TreeSet<Integer>();
    int[] nbValuesForAttribute = new int[nbVariables];
    for (int a = 0; a < nbVariables; a++) {
        nbValuesForAttribute[a] = structure.numDistinctValues(a) + 1;//+1 for missing
        presence[a] = new BitSet[nbValuesForAttribute[a]];
        allAttributesNumbers.add(a);/*w  ww .  j  av a2s. c o m*/
        for (int v = 0; v < presence[a].length; v++) {
            presence[a][v] = new BitSet();
        }
    }

    Instance row;
    while ((row = loader.readInstance(structure)) != null) {
        for (int a = 0; a < nbVariables; a++) {
            int indexOfValue;
            if (row.isMissing(a)) {
                indexOfValue = structure.numDistinctValues(a);//missing at the end
            } else {
                String value = row.stringValue(a);
                indexOfValue = row.attribute(a).indexOfValue(value);
            }
            presence[a][indexOfValue].set(this.nbInstances);

        }
        this.nbInstances++;
    }

    // initialise the first nodes of the lattice (i.e., the ones
    // corresponding to single variables
    this.all = new LatticeNode(this, nbValuesForAttribute);
    this.singleNodes = new LatticeNode[nbVariables];
    for (int a = 0; a < nbVariables; a++) {
        int[] variablesNumbers = { a };
        LatticeNode node = new LatticeNode(this, variablesNumbers, nbValuesForAttribute, presence[a], all);
        singleNodes[a] = node;
    }
}

From source file:lu.lippmann.cdb.common.gui.dataset.InstanceFormatter.java

License:Open Source License

public static String htmlFormat(final Instance inst, final boolean withHTMLHeader) {
    final StringBuilder sb = new StringBuilder();
    if (withHTMLHeader)
        sb.append("<html><body>");
    for (int i = 0; i < inst.numAttributes(); i++) {
        sb.append(StringEscapeUtils.escapeHtml(inst.attribute(i).name())).append(" = ");
        sb.append("<b>");
        if (inst.attribute(i).isNominal() || inst.attribute(i).isString()) {
            sb.append(StringEscapeUtils.escapeHtml(inst.stringValue(i)));
        } else if (inst.attribute(i).isDate()) {
            final Calendar cal = Calendar.getInstance();
            cal.setTimeInMillis((long) inst.value(i));
            sb.append(FormatterUtil.DATE_FORMAT.format(cal.getTime()));
        } else if (inst.attribute(i).isNumeric()) {
            sb.append(inst.value(i));/*from ww w .ja  v  a 2s.  c  o  m*/
        }
        sb.append("</b>");
        sb.append("<br/>");
    }
    if (withHTMLHeader)
        sb.append("</body></html>");
    return sb.toString();
}

From source file:lu.lippmann.cdb.lab.beta.shih.Shih2010.java

License:Open Source License

/**
 * /*www  . j a v  a2s . c o m*/
 * @return
 * @throws Exception 
 */
private void computeMatrixMDF() throws Exception {
    //Build filtered instance for each element of the base
    int baseSize = base.size();
    Attribute baseAttribute = instances.attribute(baseIndex);

    //Create baseSize copy of instances for filtering
    List<Instances> filteredInstances = new ArrayList<Instances>();
    for (int i = 0; i < baseSize; i++) {
        filteredInstances.add(new Instances(instances));
    }

    //Filter
    for (int i = 0; i < instances.numInstances(); i++) {
        final Instance instance = instances.instance(i);
        for (final TupleSI j : base) {
            final int wekaAttributeValue = (int) instance.value(baseIndex);
            if (!new TupleSI(baseAttribute.value(wekaAttributeValue), baseIndex).equals(j)) {
                WekaUtil2.removeFromInstances(filteredInstances.get(base.indexOf(j)), instance);
            }
        }
    }

    //Compute I vector
    final int idxsNs = idxsN.size();

    if (idxsNs == 0) {
        throw new Exception("You need at least one numerical attribute !!");
    }

    int minIndexForI = -1;
    double minValueForI = Double.MAX_VALUE;
    final double[][] meanBase = new double[idxsNs][baseSize];
    int p = 0;
    for (final Integer num : idxsN) {
        double Ip = 0.0;
        for (int j = 0; j < baseSize; j++) {
            final List<Instance> filtredInstance = filteredInstances.get(j);
            final int fs = filtredInstance.size();
            double mean = 0;
            for (int l = 0; l < fs; l++) {
                mean += filtredInstance.get(l).value(num);
            }
            mean = mean / fs;
            meanBase[p][j] = mean;
            for (int l = 0; l < fs; l++) {
                Ip += Math.pow(filtredInstance.get(l).value(num) - mean, 2);
            }
        }
        if (Ip < minValueForI) {
            minValueForI = Ip;
            minIndexForI = p;
        }
        p++;
    }

    this.maxNum = new double[idxsNs];
    for (int i = 0; i < instances.numInstances(); i++) {
        final Instance instance = instances.instance(i);
        //Save maximum value for each numerical attribute
        for (Integer n1 : idxsN) {
            double val = instance.value(n1);
            int idx = idxsN.indexOf(n1);
            if (val > maxNum[idx]) {
                maxNum[idx] = val;
            }
        }
        //Compute matrix M for each categorical attribute
        for (final Integer e1 : idxsC) {
            for (final Integer e2 : idxsC) {
                final int i1 = getIndexOf(e1, instance.attribute(e1).value((int) instance.value(e1)));
                final int j1 = getIndexOf(e2, instance.attribute(e2).value((int) instance.value(e2)));
                M[i1][j1] = M[i1][j1] + 1;
            }
        }
    }

    //Compute D matrix
    for (int i = 0; i < n; i++) {
        for (int j = 0; j < n; j++) {
            double d = M[i][j] / (M[i][i] + M[j][j] - M[i][j] + 0.0);
            if (d >= theta) {
                D[i][j] = d;
            } else {
                D[i][j] = 0;
            }
        }
    }

    //Compute F matrix for base
    for (final TupleSI baseVal : base) {
        F.put(baseVal, meanBase[minIndexForI][base.indexOf(baseVal)]);
    }

    //Compute F matrix for noBase
    for (final TupleSI noBaseVal : noBase) {
        double f = 0.0;
        for (final TupleSI baseVal : base) {
            f += D[getIndexOf(noBaseVal)][getIndexOf(baseVal)] * F.get(baseVal);
        }
        F.put(noBaseVal, f);
    }

}

From source file:lu.lippmann.cdb.lab.beta.shih.Shih2010.java

License:Open Source License

/**
 * /*from  w w  w.  jav a 2s  .com*/
 * @return
 */
public Instances getModifiedInstances() {

    //Copy attribute list (and change categorical by numerical)
    final ArrayList<Attribute> lAttrs = new ArrayList<Attribute>();
    for (int i = 0; i < instances.numAttributes(); i++) {
        Attribute attr = instances.attribute(i);
        if (attr.isNumeric() || attr.index() == instances.classIndex()) {
            lAttrs.add(attr);
        } else {
            Attribute newAttr = new Attribute(attr.name());
            lAttrs.add(newAttr);
        }
    }

    //Build new instance
    final Instances newInstances = new Instances("Shih instance", lAttrs, instances.numInstances());
    newInstances.setClassIndex(instances.classIndex());
    for (int i = 0; i < instances.numInstances(); i++) {
        final Instance instance = instances.instance(i);
        final Instance cpyInstance = (Instance) instance.copy();
        for (int j = 0; j < instance.numAttributes(); j++) {
            Attribute attribute = instance.attribute(j);
            int k = 0;
            if (attribute.index() == instances.classIndex()) {
                //The class index is nominal
                cpyInstance.setValue(attribute, instance.stringValue(j));
            } else if (!attribute.isNumeric()) {
                String elt = attribute.value((int) instance.value(j));
                cpyInstance.setValue(attribute, F.get(new TupleSI(elt, j)));
            } else {
                if (maxNum[k] > 1) {
                    cpyInstance.setValue(attribute, instance.value(j) / maxNum[k]);
                }
                k++;
            }
        }
        newInstances.add(cpyInstance);
    }

    if (ignoreClass && instances.classIndex() != -1) {
        newInstances.deleteAttributeAt(instances.classIndex());
    }
    return newInstances;
}

From source file:lu.lippmann.cdb.lab.beta.util.WekaUtil2.java

License:Open Source License

/**
 * Generate the centroid coordinates based 
 * on it's  members (objects assigned to the cluster of the centroid) and the distance 
 * function being used.//  w  w  w  . j  a v a2 s.  co m
 * @return the centroid
 */
public static MixedCentroid computeMixedCentroid(final boolean preserveOrder,
        final NormalizableDistance distanceFunction, final Instances numericInstances,
        final Instances originalInstances, final int clusterIndex) {
    final int numInstances = numericInstances.numInstances();
    final int numAttributes = numericInstances.numAttributes();

    final Map<TupleSI, Integer> addedAttr = new HashMap<TupleSI, Integer>();

    if (numInstances == 1) {
        Instance uniqueNumInstance = numericInstances.firstInstance();
        Instance uniqueMixInstance = originalInstances.firstInstance();
        double[] centroid = uniqueNumInstance.toDoubleArray();
        for (int i = 0; i < uniqueMixInstance.numAttributes(); i++) {
            if (!uniqueMixInstance.attribute(i).isNumeric()) {
                final String catVal = uniqueMixInstance.attribute(i).value((int) uniqueMixInstance.value(i));
                addedAttr.put(new TupleSI(catVal, i), 1);
            }
        }
        return new MixedCentroid(clusterIndex, centroid, addedAttr);
    }

    final double[] vals = new double[numAttributes];

    //used only for Manhattan Distance
    Instances sortedMembers = null;
    int middle = 0;
    boolean dataIsEven = false;

    final boolean isManhattanDist = (distanceFunction instanceof ManhattanDistance);
    final boolean isEuclideanDist = (distanceFunction instanceof EuclideanDistance);

    if (isManhattanDist) {
        middle = (numInstances - 1) / 2;
        dataIsEven = ((numInstances % 2) == 0);
        if (preserveOrder) {
            sortedMembers = numericInstances;
        } else {
            sortedMembers = new Instances(numericInstances);
        }
    }

    for (int j = 0; j < numAttributes; j++) {
        //in case of Euclidian distance the centroid is the mean point
        //in case of Manhattan distance the centroid is the median point
        //in both cases, if the attribute is nominal, the centroid is the mode            
        if (isEuclideanDist) {
            vals[j] = numericInstances.meanOrMode(j);

            for (int i = 0; i < numInstances; i++) {
                if (!originalInstances.attribute(j).isNumeric()) {
                    final Instance instance = originalInstances.instance(i);
                    final String catVal = instance.attribute(j).value((int) instance.value(j));
                    //Initialize map
                    final TupleSI key = new TupleSI(catVal, j);
                    if (!addedAttr.containsKey(key))
                        addedAttr.put(key, 0);
                    addedAttr.put(key, addedAttr.get(key) + 1);
                }
            }
        } else if (isManhattanDist) {
            sortedMembers.kthSmallestValue(j, middle + 1);
            vals[j] = sortedMembers.instance(middle).value(j);
            if (dataIsEven) {
                sortedMembers.kthSmallestValue(j, middle + 2);
                vals[j] = (vals[j] + sortedMembers.instance(middle + 1).value(j)) / 2;
            }
        } else {
            throw new IllegalStateException("Not handled distance ...");
        }
    }

    return new MixedCentroid(clusterIndex, vals, addedAttr);
}