Example usage for weka.core Instance value

Introduction

In this page you can find the example usage for weka.core Instance value.

Prototype

public double value(Attribute att);

Source Link

Document

Returns an instance's attribute value in internal format.

Usage

From source file:es.jarias.FMC.FMC.java

License:Open Source License

public static double[][] mutualInfo(Instances data, int[] indexes) {

    double[][] m_counts = new double[indexes.length][];
    double[][][] m_2counts = new double[indexes.length][indexes.length][];

    double[] nValues = new double[indexes.length];

    double[][] I = new double[indexes.length][indexes.length];

    for (int i = 0; i < indexes.length; i++) {
        nValues[i] = data.attribute(indexes[i]).numValues();
        m_counts[i] = new double[(int) nValues[i]];
    }/*  ww  w .  j  a  v a 2s . co  m*/

    for (int i = 0; i < indexes.length; i++) {
        for (int j = 0; j < indexes.length; j++) {
            if (i != j) {
                double cardinality = nValues[i] * nValues[j];
                m_2counts[i][j] = new double[(int) cardinality];
            }
        }
    }

    // Compute counts:
    for (Instance d : data) {
        for (int i = 0; i < indexes.length; i++) {
            m_counts[i][(int) d.value(indexes[i])]++;
            for (int j = 0; j < indexes.length; j++) {
                if (i != j) {
                    int index = (int) (d.value(indexes[j]) * nValues[i] + d.value(indexes[i]));
                    m_2counts[i][j][index]++;
                }
            }
        }
    }

    // Calculate MI(X_i; X_j)
    for (int i = 0; i < indexes.length; i++) {
        for (int j = 0; j < indexes.length; j++) {
            if (i != j) {
                double mi = 0.0;
                for (int v_i = 0; v_i < nValues[i]; v_i++) {
                    for (int v_j = 0; v_j < nValues[j]; v_j++) {

                        if ((1.0 * data.numInstances() * m_2counts[i][j][(int) (v_j * nValues[i] + v_i)])
                                / (1.0 * m_counts[i][v_i] * m_counts[j][v_j]) > 0)
                            mi += m_2counts[i][j][(int) (v_j * nValues[i] + v_i)] * Math.log((1.0
                                    * data.numInstances() * m_2counts[i][j][(int) (v_j * nValues[i] + v_i)])
                                    / (1.0 * m_counts[i][v_i] * m_counts[j][v_j]));
                    }
                }
                I[i][j] = mi / data.numInstances();
            }
        }
    }

    return I;
}

From source file:es.jarias.FMC.HITON.java

License:Open Source License

public HITON(Instances data) {
    dataset = data;//from   www  .  j av  a 2s.c o m
    m_numAttributes = dataset.numAttributes();
    m_numCases = dataset.numInstances();

    m_numValues = new int[m_numAttributes];
    for (int att = 0; att < m_numAttributes; att++)
        m_numValues[att] = dataset.attribute(att).numValues();

    m_counts = new double[m_numAttributes][];
    for (int att = 0; att < m_numAttributes; att++)
        m_counts[att] = new double[m_numValues[att]];

    m_condiCounts = new double[m_numAttributes][m_numAttributes][];
    for (int att1 = 0; att1 < m_numAttributes; att1++) {
        for (int att2 = att1 + 1; att2 < m_numAttributes; att2++) {
            m_condiCounts[att1][att2] = new double[m_numValues[att1] * m_numValues[att2]];
            m_condiCounts[att2][att1] = new double[m_numValues[att1] * m_numValues[att2]];
        }
    }

    I = new double[m_numAttributes][m_numAttributes];

    // Compute counts:
    for (Instance inst : dataset) {
        for (int att1 = 0; att1 < m_numAttributes; att1++) {
            m_counts[att1][(int) inst.value(att1)]++;

            for (int att2 = att1 + 1; att2 < m_numAttributes; att2++) {
                m_condiCounts[att1][att2][(int) (inst.value(att1) * m_numValues[att2] + inst.value(att2))]++;
                m_condiCounts[att2][att1][(int) (inst.value(att2) * m_numValues[att1] + inst.value(att1))]++;
            }
        }
    }

    // Compute I(X_i; X_j)
    for (int i = 0; i < m_numAttributes; i++) {
        for (int j = 0; j < m_numAttributes; j++) {
            if (i == j)
                continue;

            double mi = 0.0;
            for (int v_i = 0; v_i < m_numValues[i]; v_i++) {
                for (int v_j = 0; v_j < m_numValues[j]; v_j++) {
                    int condiIndex = (int) (v_i * m_numValues[j] + v_j);
                    if ((1.0 * m_numCases * m_condiCounts[i][j][condiIndex])
                            / (1.0 * m_counts[i][v_i] * m_counts[j][v_j]) > 0)
                        mi += m_condiCounts[i][j][condiIndex]
                                * Math.log((1.0 * m_numCases * m_condiCounts[i][j][condiIndex])
                                        / (1.0 * m_counts[i][v_i] * m_counts[j][v_j]));
                }
            }
            I[i][j] = mi / m_numCases;
        }
    }

    // Compute parent and children for every variable:
    PC1 = new ArrayList[m_numAttributes];
    for (int i = 0; i < m_numAttributes; i++)
        PC1[i] = HITONPC1(i);

    PC = new HashSet[m_numAttributes];
    for (int i = 0; i < m_numAttributes; i++)
        PC[i] = HITONPC2(i);

}

From source file:es.jarias.FMC.HITON.java

License:Open Source License

public double computeConditionalMI(int X, int Y, Integer[] Z) {

    int Zcardinality = 1;
    int[] offsets = new int[Z.length];

    for (int z = Z.length - 1; z >= 0; z--) {
        offsets[z] = Zcardinality;/*from www .j a  v  a 2  s  .  c  o  m*/
        Zcardinality *= m_numValues[Z[z]];
    }
    offsets[Z.length - 1] = 0;

    double conditional2Counts[][][] = new double[m_numValues[X]][m_numValues[Y]][Zcardinality];
    double conditionalXCounts[][] = new double[m_numValues[X]][Zcardinality];
    double conditionalYCounts[][] = new double[m_numValues[Y]][Zcardinality];

    double Zcounts[] = new double[Zcardinality];

    for (Instance inst : dataset) {
        int index = 0;
        for (int z = 0; z < Z.length; z++)
            index += inst.value(Z[z]) * offsets[z];
        index += inst.value(Z[Z.length - 1]);

        Zcounts[index]++;
        conditionalXCounts[(int) inst.value(X)][index]++;
        conditionalYCounts[(int) inst.value(Y)][index]++;
        conditional2Counts[(int) inst.value(X)][(int) inst.value(Y)][index]++;
    }

    // COMPUTE I(X;Y|Z)
    double mi = 0.0;
    for (int v_z = 0; v_z < Zcardinality; v_z++) {
        double p_z = Zcounts[v_z] / m_numCases;
        double t_mi = 0.0;
        for (int v_x = 0; v_x < m_numValues[X]; v_x++) {
            for (int v_y = 0; v_y < m_numValues[Y]; v_y++) {
                if ((1.0 * conditional2Counts[v_x][v_y][v_z] / Zcounts[v_z])
                        / ((conditionalXCounts[v_x][v_z] / Zcounts[v_z])
                                * (conditionalYCounts[v_y][v_z] / Zcounts[v_z])) > 0) {
                    t_mi += (conditional2Counts[v_x][v_y][v_z] / Zcounts[v_z])
                            * Math.log((1.0 * conditional2Counts[v_x][v_y][v_z] / Zcounts[v_z])
                                    / ((conditionalXCounts[v_x][v_z] / Zcounts[v_z])
                                            * (conditionalYCounts[v_y][v_z] / Zcounts[v_z])));
                }
            }
        }
        mi += p_z * t_mi;
    }

    return mi;
}

From source file:etc.aloe.filters.AbstractRegexFilter.java

License:Open Source License

@Override
protected Instance process(Instance instance) throws Exception {
    if (stringAttributeIndex < 0) {
        throw new IllegalStateException("String attribute not set");
    }/*www  .  ja  v  a 2 s .  com*/

    String stringValue = instance.stringValue(stringAttributeIndex);
    NamedRegex[] regexFeatures = getRegexFeatures();

    int numOldValues = instance.numAttributes();
    int numNewFeatures = regexFeatures.length;
    if (countRegexLengths) {
        numNewFeatures = regexFeatures.length * 2;
    }
    double[] newValues = new double[numOldValues + numNewFeatures];

    // Copy all attributes from input to output
    for (int i = 0; i < getInputFormat().numAttributes(); i++) {
        if (getInputFormat().attribute(i).type() != Attribute.STRING) {
            // Add simple nominal and numeric attributes directly
            if (instance.value(i) != 0.0) {
                newValues[i] = instance.value(i);
            }
        } else {
            if (instance.isMissing(i)) {
                newValues[i] = Utils.missingValue();
            } else {

                // If this is a string attribute, we have to first add
                // this value to the range of possible values, then add
                // its new internal index.
                if (outputFormatPeek().attribute(i).numValues() == 0) {
                    // Note that the first string value in a
                    // SparseInstance doesn't get printed.
                    outputFormatPeek().attribute(i).addStringValue("Hack to defeat SparseInstance bug");
                }
                int newIndex = outputFormatPeek().attribute(i).addStringValue(instance.stringValue(i));
                newValues[i] = newIndex;
            }
        }
    }

    for (int i = 0; i < regexFeatures.length; i++) {
        Pattern pattern = regexFeatures[i].getPattern();

        Matcher matches = pattern.matcher(stringValue);
        int count = 0;
        int maxLength = 0;
        while (matches.find()) {
            count++;
            int len = matches.group().length();
            if (len > maxLength) {
                maxLength = len;
            }
        }

        int index = numOldValues + i;
        if (countRegexLengths) {
            index = numOldValues + 2 * i;
        }
        newValues[index] = count;

        if (countRegexLengths) {
            newValues[numOldValues + 2 * i + 1] = maxLength;
        }
    }

    Instance result = new SparseInstance(instance.weight(), newValues);
    return result;
}

From source file:etc.aloe.filters.StringToDictionaryVector.java

License:Open Source License

/**
 * Converts the instance w/o normalization.
 *
 * @param instance the instance to convert
 *
 * @param ArrayList<Instance> the list of instances
 * @return the document length//from   w w  w .  j  a v  a 2 s.com
 */
private double convertInstancewoDocNorm(Instance instance, ArrayList<Instance> converted) {
    if (stringAttributeIndex < 0) {
        throw new IllegalStateException("String attribute index not valid");
    }

    int numOldValues = instance.numAttributes();
    double[] newValues = new double[numOldValues + m_selectedTerms.size()];

    // Copy all attributes from input to output
    for (int i = 0; i < getInputFormat().numAttributes(); i++) {
        if (getInputFormat().attribute(i).type() != Attribute.STRING) {
            // Add simple nominal and numeric attributes directly
            if (instance.value(i) != 0.0) {
                newValues[i] = instance.value(i);
            }
        } else {
            if (instance.isMissing(i)) {
                newValues[i] = Utils.missingValue();
            } else {

                // If this is a string attribute, we have to first add
                // this value to the range of possible values, then add
                // its new internal index.
                if (outputFormatPeek().attribute(i).numValues() == 0) {
                    // Note that the first string value in a
                    // SparseInstance doesn't get printed.
                    outputFormatPeek().attribute(i).addStringValue("Hack to defeat SparseInstance bug");
                }
                int newIndex = outputFormatPeek().attribute(i).addStringValue(instance.stringValue(i));
                newValues[i] = newIndex;
            }
        }
    }

    String stringValue = instance.stringValue(stringAttributeIndex);
    double docLength = 0;

    HashMap<String, Integer> termMatches = m_selectedTermsTrie.countNonoverlappingMatches(stringValue);
    for (Map.Entry<String, Integer> entry : termMatches.entrySet()) {
        String term = entry.getKey();
        int termIdx = m_selectedTermIndices.get(term);
        double matches = entry.getValue();
        if (!m_OutputCounts && matches > 0) {
            matches = 1;
        }

        if (matches > 0) {
            if (m_TFTransform == true) {
                matches = Math.log(matches + 1);
            }

            if (m_IDFTransform == true) {
                matches = matches * Math.log(m_NumInstances / (double) m_DocsCounts[termIdx]);
            }

            newValues[numOldValues + termIdx] = matches;
            docLength += matches * matches;
        }
    }

    Instance result = new SparseInstance(instance.weight(), newValues);
    converted.add(result);

    return Math.sqrt(docLength);
}

From source file:etc.aloe.filters.StringToDictionaryVector.java

License:Open Source License

/**
 * Normalizes given instance to average doc length (only the newly
 * constructed attributes)./*from  ww w.  java 2  s .c om*/
 *
 * @param inst   the instance to normalize
 * @param double the document length
 * @throws Exception if avg. doc length not set
 */
private void normalizeInstance(Instance inst, double docLength) throws Exception {

    if (docLength == 0) {
        return;
    }

    int numOldValues = getInputFormat().numAttributes();

    if (m_AvgDocLength < 0) {
        throw new Exception("Average document length not set.");
    }

    // Normalize document vector
    for (int j = numOldValues; j < inst.numAttributes(); j++) {
        double val = inst.value(j) * m_AvgDocLength / docLength;
        inst.setValue(j, val);
    }
}

From source file:etc.aloe.filters.WordFeaturesExtractor.java

License:Open Source License

@Override
protected Instance process(Instance instance) throws Exception {
    if (selectedAttributeIndex < 0) {
        throw new IllegalStateException("String attribute not set");
    }//from  w  ww . jav  a 2  s  . co  m

    int numOldValues = instance.numAttributes();
    int numNewFeatures = unigrams.size() + bigrams.size();
    double[] newValues = new double[numOldValues + numNewFeatures];

    // Copy all attributes from input to output
    for (int i = 0; i < getInputFormat().numAttributes(); i++) {
        if (getInputFormat().attribute(i).type() != Attribute.STRING) {
            // Add simple nominal and numeric attributes directly
            if (instance.value(i) != 0.0) {
                newValues[i] = instance.value(i);
            }
        } else {
            if (instance.isMissing(i)) {
                newValues[i] = Utils.missingValue();
            } else {

                // If this is a string attribute, we have to first add
                // this value to the range of possible values, then add
                // its new internal index.
                if (outputFormatPeek().attribute(i).numValues() == 0) {
                    // Note that the first string value in a
                    // SparseInstance doesn't get printed.
                    outputFormatPeek().attribute(i).addStringValue("Hack to defeat SparseInstance bug");
                }
                int newIndex = outputFormatPeek().attribute(i).addStringValue(instance.stringValue(i));
                newValues[i] = newIndex;
            }
        }
    }

    String stringValue = instance.stringValue(selectedAttributeIndex);
    if (instance.isMissing(selectedAttributeIndex) == false) {

        List<String> words = tokenizeDocument(instance);
        Set<String> wordSet = new HashSet<String>(words);

        for (int i = 0; i < unigrams.size(); i++) {
            String unigram = unigrams.get(i);
            int count = 0;
            if (wordSet.contains(unigram)) {
                //Count the times the word is in the document
                for (int w = 0; w < words.size(); w++) {
                    if (words.get(w).equals(unigram)) {
                        count += 1;
                    }
                }
            }

            int featureIndex = numOldValues + i;
            newValues[featureIndex] = count;
        }

        for (int i = 0; i < bigrams.size(); i++) {
            Bigram bigram = bigrams.get(i);
            int count = bigram.getTimesInDocument(words);
            int featureIndex = numOldValues + unigrams.size() + i;
            newValues[featureIndex] = count;
        }
    }

    Instance result = new SparseInstance(instance.weight(), newValues);
    return result;
}

From source file:experimentshell.knn.java

@Override
public double classifyInstance(Instance instance) throws Exception {
    // sorting happens automatically with a TreeMap
    Map<Integer, Double> distances = new TreeMap<>();
    int distance = 0;

    // iterate through the data and determine the distance
    for (int i = 0; i < data.numInstances(); i++) {
        for (int j = 0; j < data.numAttributes(); j++) {
            // this is the Manhattan distance
            distance += abs((int) (data.instance(i).value(j) - instance.value(j)));
            // the Euclidean distance
            // distance += pow((int)(data.instance(i).value(j) - instance.value(j)), 2);
        }/* w  ww. jav a 2 s.  co  m*/
        distances.put(distance, data.instance(i).classValue());
        distance = 0;
    }

    // find the majority class of the nearest neighbor
    int count = 0;
    double tempClass;
    int tally[] = new int[data.numClasses()];
    // use k instances to find the majority class and assign that instance
    for (Map.Entry<Integer, Double> entry : distances.entrySet()) {
        if (count >= k)
            break;
        tempClass = entry.getValue();
        tally[(int) tempClass]++;
        count++;
    }

    int maxIndex = 0;
    int majority;
    for (int i = 0; i < data.numClasses(); i++) {
        majority = tally[i];
        if (majority > tally[maxIndex]) {
            maxIndex = i;
        }
    }

    return maxIndex;
}

From source file:expshell.KNN.java

@Override
public double classifyInstance(Instance instance) throws Exception {
    //Sort by using TreeMap
    Map<Integer, Double> distances = new TreeMap<>();
    int distance = 0;

    //go through the data and find hte distance
    for (int i = 0; i < data.numInstances(); i++) {
        for (int j = 0; i < data.numAttributes(); j++) {
            //the Manhattan distance
            distance += abs((int) (data.instance(i).value(j) - instance.value(j)));
        }/*from  w w  w . j  a va 2  s.c  o  m*/
        distances.put(distance, data.instance(i).classValue());
        distance = 0;
    }

    //find the nearest neighbor's major class
    int count = 0;
    double tempClass;

    int tally[] = new int[data.numClasses()];
    // comment...
    for (Map.Entry<Integer, Double> entry : distances.entrySet()) {
        if (count >= k)
            break;
        tempClass = entry.getValue();
        tally[(int) tempClass]++;
        count++;
    }

    int maxIndex = 0;
    int majority;
    for (int i = 0; i < data.numClasses(); i++) {
        majority = tally[i];
        if (majority > tally[maxIndex]) {
            maxIndex = i;
        }
    }
    return maxIndex;
}

From source file:expshell.Neural.java

public double cal(Instance ins) {
    double sum = 0;
    sum += -1 * weights.get(0);//from   ww  w  .j  a  va2 s.c o m
    for (int i = 1; i < weights.size(); i++) {
        sum += ins.value(i - 1) * weights.get(i);
    }

    List<Double> temp = new ArrayList<Double>();
    for (int i = 0; i < ins.numValues(); i++) {
        temp.add(ins.value(i));
    }
    values = temp;
    //if (sum > 0)
    //    return 1.0;
    return threshold(sum);
}