Example usage for weka.core Instances numDistinctValues

List of usage examples for weka.core Instances numDistinctValues

Introduction

In this page you can find the example usage for weka.core Instances numDistinctValues.

Prototype

publicint numDistinctValues(Attribute att) 

Source Link

Document

Returns the number of distinct values of a given attribute.

Usage

From source file:Helper.CustomFilter.java

public String convertToFit(String value, Instances data, int index) {
    int i;/*from   w  w w .j a v  a 2  s  .  c  om*/
    String threshold = data.attribute(index).value(0);
    for (i = 0; i < data.numDistinctValues(data.attribute(index)); i++) {
        if (Float.valueOf(value) < Float.valueOf(data.attribute(index).value(i))) {
            value = threshold;
            return value;
        }
        threshold = data.attribute(index).value(i);
    }
    value = threshold;
    return value;
}

From source file:lattice.Lattice.java

License:Open Source License

/**
 * Constructor of a lattice over the given variables of the dataset.
 * //from  w ww  .j a v a  2 s  . com
 * @param dataset
 */
public Lattice(Instances dataset) {

    // ~ initialise internal structure for counting (TID sets)
    this.nbInstances = dataset.numInstances();
    this.nbVariables = dataset.numAttributes();

    BitSet[][] presence = new BitSet[nbVariables][];

    TreeSet<Integer> allAttributesNumbers = new TreeSet<Integer>();
    int[] nbValuesForAttribute = new int[nbVariables];
    for (int a = 0; a < nbVariables; a++) {
        nbValuesForAttribute[a] = dataset.numDistinctValues(a) + 1; //+1 for missing
        presence[a] = new BitSet[nbValuesForAttribute[a]];
        allAttributesNumbers.add(a);
        for (int v = 0; v < presence[a].length; v++) {
            presence[a][v] = new BitSet();
        }
    }

    for (int i = 0; i < nbInstances; i++) {
        Instance row = dataset.instance(i);
        for (int a = 0; a < nbVariables; a++) {

            int indexOfValue;
            if (row.isMissing(a)) {
                //               indexOfValue = (int) dataset.meanOrMode(a);
                indexOfValue = dataset.numDistinctValues(a); //missing at the end
            } else {
                String value = row.stringValue(a);
                indexOfValue = row.attribute(a).indexOfValue(value);
            }
            presence[a][indexOfValue].set(i);

        }
    }

    // initialise the first nodes of the lattice (i.e., the ones
    // corresponding to single variables
    this.all = new LatticeNode(this, nbValuesForAttribute);
    this.singleNodes = new LatticeNode[nbVariables];
    for (int a = 0; a < nbVariables; a++) {
        int[] variablesNumbers = { a };
        LatticeNode node = new LatticeNode(this, variablesNumbers, nbValuesForAttribute, presence[a], all);
        singleNodes[a] = node;
    }

}

From source file:lattice.Lattice.java

License:Open Source License

public Lattice(Instances structure, ArffReader loader) throws IOException {
    // ~ initialise internal structure for counting (TID sets)
    this.nbInstances = 0;
    this.nbVariables = structure.numAttributes();

    BitSet[][] presence = new BitSet[nbVariables][];

    TreeSet<Integer> allAttributesNumbers = new TreeSet<Integer>();
    int[] nbValuesForAttribute = new int[nbVariables];
    for (int a = 0; a < nbVariables; a++) {
        nbValuesForAttribute[a] = structure.numDistinctValues(a) + 1;//+1 for missing
        presence[a] = new BitSet[nbValuesForAttribute[a]];
        allAttributesNumbers.add(a);// w  ww .j a va2s  .c  om
        for (int v = 0; v < presence[a].length; v++) {
            presence[a][v] = new BitSet();
        }
    }

    Instance row;
    while ((row = loader.readInstance(structure)) != null) {
        for (int a = 0; a < nbVariables; a++) {
            int indexOfValue;
            if (row.isMissing(a)) {
                indexOfValue = structure.numDistinctValues(a);//missing at the end
            } else {
                String value = row.stringValue(a);
                indexOfValue = row.attribute(a).indexOfValue(value);
            }
            presence[a][indexOfValue].set(this.nbInstances);

        }
        this.nbInstances++;
    }

    // initialise the first nodes of the lattice (i.e., the ones
    // corresponding to single variables
    this.all = new LatticeNode(this, nbValuesForAttribute);
    this.singleNodes = new LatticeNode[nbVariables];
    for (int a = 0; a < nbVariables; a++) {
        int[] variablesNumbers = { a };
        LatticeNode node = new LatticeNode(this, variablesNumbers, nbValuesForAttribute, presence[a], all);
        singleNodes[a] = node;
    }
}

From source file:machinelearningq2.BasicNaiveBayesV1.java

/**
 *
 * Performs lapalce correction to ensure there are no zero values in the
 * data Creating a DataFound object ensures the count starts from 1
 *
 * @param instnc/*www.  j  a  v  a  2s.c  o  m*/
 * @return
 * @throws Exception
 */
public void laplaceCorrection(Instances inst) throws ParseException {
    inst.setClassIndex(inst.numAttributes() - 1);
    for (int c = 0; c < inst.numClasses(); c++) {
        for (int j = 0; j < inst.numAttributes() - 1; j++) {
            for (int i = 0; i < inst.numDistinctValues(j); i++) {
                String attributeValue = inst.attribute(j).value(i);
                NumberFormat nf = NumberFormat.getInstance();
                double atval = nf.parse(attributeValue).doubleValue();
                DataFound d = new DataFound(atval, c, i);
                data.add(d);
            }
        }
    }
}

From source file:org.openml.webapplication.fantail.dc.DCUntils.java

License:Open Source License

public static double[] computeAttributeEntropy(Instances data) {
    List<Double> attributeEntropy = new ArrayList<Double>();
    for (int attIndex = 0; attIndex < data.numAttributes(); attIndex++) {

        if (data.attribute(attIndex).isNominal() && (data.classIndex() != attIndex)) {
            double[] attValueCounts = new double[data.numDistinctValues(attIndex)];

            for (int i = 0; i < data.numInstances(); i++) {
                Instance inst = data.instance(i);
                attValueCounts[(int) inst.value(attIndex)]++;
            }//from   www.j  ava 2 s .c  o m
            double attEntropy = 0;
            for (int c = 0; c < data.attribute(attIndex).numValues(); c++) {
                if (attValueCounts[c] > 0) {
                    double prob_c = attValueCounts[c] / data.numInstances();
                    attEntropy += prob_c * (Utils.log2(prob_c));
                }
            }
            attEntropy = attEntropy * -1.0;
            attributeEntropy.add(attEntropy);
        }
    }
    return ArrayUtils.toPrimitive(attributeEntropy.toArray(new Double[attributeEntropy.size()]));
}

From source file:org.openml.webapplication.fantail.dc.statistical.NominalAttDistinctValues.java

License:Open Source License

@Override
public Map<String, Double> characterize(Instances data) {
    int attrib_count = data.numAttributes() - 1;
    int nominal_count = 0;

    for (int i = 0; i < attrib_count; i++) {
        if (data.attribute(i).isNominal()) {
            nominal_count++;//w ww  . java 2  s .  c  o  m
        }
    }

    if (nominal_count == 0) {
        Map<String, Double> qualities = new HashMap<String, Double>();
        qualities.put(ids[0], -1.0);
        qualities.put(ids[1], -1.0);
        qualities.put(ids[2], -1.0);
        qualities.put(ids[3], -1.0);
        return qualities;
    }

    ArrayList<Double> distinctValuesCounts = new ArrayList<Double>();

    for (int i = 0; i < attrib_count; i++) {
        if (data.attribute(i).isNominal()) {
            distinctValuesCounts.add(1.0 * data.numDistinctValues(i));

        }
    }

    double[] values = new double[distinctValuesCounts.size()];
    for (int i = 0; i < distinctValuesCounts.size(); i++) {
        values[i] = distinctValuesCounts.get(i);
    }

    double min = values[Utils.minIndex(values)];
    double max = values[Utils.maxIndex(values)];
    double mean = Utils.mean(values);
    double variance = Utils.variance(values);
    double stdv = Math.sqrt(variance);

    Map<String, Double> qualities = new HashMap<String, Double>();
    qualities.put(ids[0], max);
    qualities.put(ids[1], min);
    qualities.put(ids[2], mean);
    qualities.put(ids[3], stdv);
    return qualities;
}