Example usage for weka.core Instance value

List of usage examples for weka.core Instance value

Introduction

In this page you can find the example usage for weka.core Instance value.

Prototype

public double value(Attribute att);

Source Link

Document

Returns an instance's attribute value in internal format.

Usage

From source file:es.jarias.FMC.FMC.java

License:Open Source License

public static double[][] mutualInfo(Instances data, int[] indexes) {

    double[][] m_counts = new double[indexes.length][];
    double[][][] m_2counts = new double[indexes.length][indexes.length][];

    double[] nValues = new double[indexes.length];

    double[][] I = new double[indexes.length][indexes.length];

    for (int i = 0; i < indexes.length; i++) {
        nValues[i] = data.attribute(indexes[i]).numValues();
        m_counts[i] = new double[(int) nValues[i]];
    }/*  ww  w .  j  a  v a 2s . co  m*/

    for (int i = 0; i < indexes.length; i++) {
        for (int j = 0; j < indexes.length; j++) {
            if (i != j) {
                double cardinality = nValues[i] * nValues[j];
                m_2counts[i][j] = new double[(int) cardinality];
            }
        }
    }

    // Compute counts:
    for (Instance d : data) {
        for (int i = 0; i < indexes.length; i++) {
            m_counts[i][(int) d.value(indexes[i])]++;
            for (int j = 0; j < indexes.length; j++) {
                if (i != j) {
                    int index = (int) (d.value(indexes[j]) * nValues[i] + d.value(indexes[i]));
                    m_2counts[i][j][index]++;
                }
            }
        }
    }

    // Calculate MI(X_i; X_j)
    for (int i = 0; i < indexes.length; i++) {
        for (int j = 0; j < indexes.length; j++) {
            if (i != j) {
                double mi = 0.0;
                for (int v_i = 0; v_i < nValues[i]; v_i++) {
                    for (int v_j = 0; v_j < nValues[j]; v_j++) {

                        if ((1.0 * data.numInstances() * m_2counts[i][j][(int) (v_j * nValues[i] + v_i)])
                                / (1.0 * m_counts[i][v_i] * m_counts[j][v_j]) > 0)
                            mi += m_2counts[i][j][(int) (v_j * nValues[i] + v_i)] * Math.log((1.0
                                    * data.numInstances() * m_2counts[i][j][(int) (v_j * nValues[i] + v_i)])
                                    / (1.0 * m_counts[i][v_i] * m_counts[j][v_j]));
                    }
                }
                I[i][j] = mi / data.numInstances();
            }
        }
    }

    return I;
}

From source file:es.jarias.FMC.HITON.java

License:Open Source License

public HITON(Instances data) {
    dataset = data;//from   www  .  j av  a 2s.c o m
    m_numAttributes = dataset.numAttributes();
    m_numCases = dataset.numInstances();

    m_numValues = new int[m_numAttributes];
    for (int att = 0; att < m_numAttributes; att++)
        m_numValues[att] = dataset.attribute(att).numValues();

    m_counts = new double[m_numAttributes][];
    for (int att = 0; att < m_numAttributes; att++)
        m_counts[att] = new double[m_numValues[att]];

    m_condiCounts = new double[m_numAttributes][m_numAttributes][];
    for (int att1 = 0; att1 < m_numAttributes; att1++) {
        for (int att2 = att1 + 1; att2 < m_numAttributes; att2++) {
            m_condiCounts[att1][att2] = new double[m_numValues[att1] * m_numValues[att2]];
            m_condiCounts[att2][att1] = new double[m_numValues[att1] * m_numValues[att2]];
        }
    }

    I = new double[m_numAttributes][m_numAttributes];

    // Compute counts:
    for (Instance inst : dataset) {
        for (int att1 = 0; att1 < m_numAttributes; att1++) {
            m_counts[att1][(int) inst.value(att1)]++;

            for (int att2 = att1 + 1; att2 < m_numAttributes; att2++) {
                m_condiCounts[att1][att2][(int) (inst.value(att1) * m_numValues[att2] + inst.value(att2))]++;
                m_condiCounts[att2][att1][(int) (inst.value(att2) * m_numValues[att1] + inst.value(att1))]++;
            }
        }
    }

    // Compute I(X_i; X_j)
    for (int i = 0; i < m_numAttributes; i++) {
        for (int j = 0; j < m_numAttributes; j++) {
            if (i == j)
                continue;

            double mi = 0.0;
            for (int v_i = 0; v_i < m_numValues[i]; v_i++) {
                for (int v_j = 0; v_j < m_numValues[j]; v_j++) {
                    int condiIndex = (int) (v_i * m_numValues[j] + v_j);
                    if ((1.0 * m_numCases * m_condiCounts[i][j][condiIndex])
                            / (1.0 * m_counts[i][v_i] * m_counts[j][v_j]) > 0)
                        mi += m_condiCounts[i][j][condiIndex]
                                * Math.log((1.0 * m_numCases * m_condiCounts[i][j][condiIndex])
                                        / (1.0 * m_counts[i][v_i] * m_counts[j][v_j]));
                }
            }
            I[i][j] = mi / m_numCases;
        }
    }

    // Compute parent and children for every variable:
    PC1 = new ArrayList[m_numAttributes];
    for (int i = 0; i < m_numAttributes; i++)
        PC1[i] = HITONPC1(i);

    PC = new HashSet[m_numAttributes];
    for (int i = 0; i < m_numAttributes; i++)
        PC[i] = HITONPC2(i);

}

From source file:es.jarias.FMC.HITON.java

License:Open Source License

public double computeConditionalMI(int X, int Y, Integer[] Z) {

    int Zcardinality = 1;
    int[] offsets = new int[Z.length];

    for (int z = Z.length - 1; z >= 0; z--) {
        offsets[z] = Zcardinality;/*from www .j a  v  a 2  s  .  c  o  m*/
        Zcardinality *= m_numValues[Z[z]];
    }
    offsets[Z.length - 1] = 0;

    double conditional2Counts[][][] = new double[m_numValues[X]][m_numValues[Y]][Zcardinality];
    double conditionalXCounts[][] = new double[m_numValues[X]][Zcardinality];
    double conditionalYCounts[][] = new double[m_numValues[Y]][Zcardinality];

    double Zcounts[] = new double[Zcardinality];

    for (Instance inst : dataset) {
        int index = 0;
        for (int z = 0; z < Z.length; z++)
            index += inst.value(Z[z]) * offsets[z];
        index += inst.value(Z[Z.length - 1]);

        Zcounts[index]++;
        conditionalXCounts[(int) inst.value(X)][index]++;
        conditionalYCounts[(int) inst.value(Y)][index]++;
        conditional2Counts[(int) inst.value(X)][(int) inst.value(Y)][index]++;
    }

    // COMPUTE I(X;Y|Z)
    double mi = 0.0;
    for (int v_z = 0; v_z < Zcardinality; v_z++) {
        double p_z = Zcounts[v_z] / m_numCases;
        double t_mi = 0.0;
        for (int v_x = 0; v_x < m_numValues[X]; v_x++) {
            for (int v_y = 0; v_y < m_numValues[Y]; v_y++) {
                if ((1.0 * conditional2Counts[v_x][v_y][v_z] / Zcounts[v_z])
                        / ((conditionalXCounts[v_x][v_z] / Zcounts[v_z])
                                * (conditionalYCounts[v_y][v_z] / Zcounts[v_z])) > 0) {
                    t_mi += (conditional2Counts[v_x][v_y][v_z] / Zcounts[v_z])
                            * Math.log((1.0 * conditional2Counts[v_x][v_y][v_z] / Zcounts[v_z])
                                    / ((conditionalXCounts[v_x][v_z] / Zcounts[v_z])
                                            * (conditionalYCounts[v_y][v_z] / Zcounts[v_z])));
                }
            }
        }
        mi += p_z * t_mi;
    }

    return mi;
}

From source file:etc.aloe.filters.AbstractRegexFilter.java

License:Open Source License

@Override
protected Instance process(Instance instance) throws Exception {
    if (stringAttributeIndex < 0) {
        throw new IllegalStateException("String attribute not set");
    }/*www  .  ja  v  a 2 s .  com*/

    String stringValue = instance.stringValue(stringAttributeIndex);
    NamedRegex[] regexFeatures = getRegexFeatures();

    int numOldValues = instance.numAttributes();
    int numNewFeatures = regexFeatures.length;
    if (countRegexLengths) {
        numNewFeatures = regexFeatures.length * 2;
    }
    double[] newValues = new double[numOldValues + numNewFeatures];

    // Copy all attributes from input to output
    for (int i = 0; i < getInputFormat().numAttributes(); i++) {
        if (getInputFormat().attribute(i).type() != Attribute.STRING) {
            // Add simple nominal and numeric attributes directly
            if (instance.value(i) != 0.0) {
                newValues[i] = instance.value(i);
            }
        } else {
            if (instance.isMissing(i)) {
                newValues[i] = Utils.missingValue();
            } else {

                // If this is a string attribute, we have to first add
                // this value to the range of possible values, then add
                // its new internal index.
                if (outputFormatPeek().attribute(i).numValues() == 0) {
                    // Note that the first string value in a
                    // SparseInstance doesn't get printed.
                    outputFormatPeek().attribute(i).addStringValue("Hack to defeat SparseInstance bug");
                }
                int newIndex = outputFormatPeek().attribute(i).addStringValue(instance.stringValue(i));
                newValues[i] = newIndex;
            }
        }
    }

    for (int i = 0; i < regexFeatures.length; i++) {
        Pattern pattern = regexFeatures[i].getPattern();

        Matcher matches = pattern.matcher(stringValue);
        int count = 0;
        int maxLength = 0;
        while (matches.find()) {
            count++;
            int len = matches.group().length();
            if (len > maxLength) {
                maxLength = len;
            }
        }

        int index = numOldValues + i;
        if (countRegexLengths) {
            index = numOldValues + 2 * i;
        }
        newValues[index] = count;

        if (countRegexLengths) {
            newValues[numOldValues + 2 * i + 1] = maxLength;
        }
    }

    Instance result = new SparseInstance(instance.weight(), newValues);
    return result;
}

From source file:etc.aloe.filters.StringToDictionaryVector.java

License:Open Source License

/**
 * Converts the instance w/o normalization.
 *
 * @param instance the instance to convert
 *
 * @param ArrayList<Instance> the list of instances
 * @return the document length//from   w w  w .  j  a v  a 2 s.com
 */
private double convertInstancewoDocNorm(Instance instance, ArrayList<Instance> converted) {
    if (stringAttributeIndex < 0) {
        throw new IllegalStateException("String attribute index not valid");
    }

    int numOldValues = instance.numAttributes();
    double[] newValues = new double[numOldValues + m_selectedTerms.size()];

    // Copy all attributes from input to output
    for (int i = 0; i < getInputFormat().numAttributes(); i++) {
        if (getInputFormat().attribute(i).type() != Attribute.STRING) {
            // Add simple nominal and numeric attributes directly
            if (instance.value(i) != 0.0) {
                newValues[i] = instance.value(i);
            }
        } else {
            if (instance.isMissing(i)) {
                newValues[i] = Utils.missingValue();
            } else {

                // If this is a string attribute, we have to first add
                // this value to the range of possible values, then add
                // its new internal index.
                if (outputFormatPeek().attribute(i).numValues() == 0) {
                    // Note that the first string value in a
                    // SparseInstance doesn't get printed.
                    outputFormatPeek().attribute(i).addStringValue("Hack to defeat SparseInstance bug");
                }
                int newIndex = outputFormatPeek().attribute(i).addStringValue(instance.stringValue(i));
                newValues[i] = newIndex;
            }
        }
    }

    String stringValue = instance.stringValue(stringAttributeIndex);
    double docLength = 0;

    HashMap<String, Integer> termMatches = m_selectedTermsTrie.countNonoverlappingMatches(stringValue);
    for (Map.Entry<String, Integer> entry : termMatches.entrySet()) {
        String term = entry.getKey();
        int termIdx = m_selectedTermIndices.get(term);
        double matches = entry.getValue();
        if (!m_OutputCounts && matches > 0) {
            matches = 1;
        }

        if (matches > 0) {
            if (m_TFTransform == true) {
                matches = Math.log(matches + 1);
            }

            if (m_IDFTransform == true) {
                matches = matches * Math.log(m_NumInstances / (double) m_DocsCounts[termIdx]);
            }

            newValues[numOldValues + termIdx] = matches;
            docLength += matches * matches;
        }
    }

    Instance result = new SparseInstance(instance.weight(), newValues);
    converted.add(result);

    return Math.sqrt(docLength);
}

From source file:etc.aloe.filters.StringToDictionaryVector.java

License:Open Source License

/**
 * Normalizes given instance to average doc length (only the newly
 * constructed attributes)./*from  ww w.  java 2  s .c om*/
 *
 * @param inst   the instance to normalize
 * @param double the document length
 * @throws Exception if avg. doc length not set
 */
private void normalizeInstance(Instance inst, double docLength) throws Exception {

    if (docLength == 0) {
        return;
    }

    int numOldValues = getInputFormat().numAttributes();

    if (m_AvgDocLength < 0) {
        throw new Exception("Average document length not set.");
    }

    // Normalize document vector
    for (int j = numOldValues; j < inst.numAttributes(); j++) {
        double val = inst.value(j) * m_AvgDocLength / docLength;
        inst.setValue(j, val);
    }
}

From source file:etc.aloe.filters.WordFeaturesExtractor.java

License:Open Source License

@Override
protected Instance process(Instance instance) throws Exception {
    if (selectedAttributeIndex < 0) {
        throw new IllegalStateException("String attribute not set");
    }//from  w  ww . jav  a 2  s  . co  m

    int numOldValues = instance.numAttributes();
    int numNewFeatures = unigrams.size() + bigrams.size();
    double[] newValues = new double[numOldValues + numNewFeatures];

    // Copy all attributes from input to output
    for (int i = 0; i < getInputFormat().numAttributes(); i++) {
        if (getInputFormat().attribute(i).type() != Attribute.STRING) {
            // Add simple nominal and numeric attributes directly
            if (instance.value(i) != 0.0) {
                newValues[i] = instance.value(i);
            }
        } else {
            if (instance.isMissing(i)) {
                newValues[i] = Utils.missingValue();
            } else {

                // If this is a string attribute, we have to first add
                // this value to the range of possible values, then add
                // its new internal index.
                if (outputFormatPeek().attribute(i).numValues() == 0) {
                    // Note that the first string value in a
                    // SparseInstance doesn't get printed.
                    outputFormatPeek().attribute(i).addStringValue("Hack to defeat SparseInstance bug");
                }
                int newIndex = outputFormatPeek().attribute(i).addStringValue(instance.stringValue(i));
                newValues[i] = newIndex;
            }
        }
    }

    String stringValue = instance.stringValue(selectedAttributeIndex);
    if (instance.isMissing(selectedAttributeIndex) == false) {

        List<String> words = tokenizeDocument(instance);
        Set<String> wordSet = new HashSet<String>(words);

        for (int i = 0; i < unigrams.size(); i++) {
            String unigram = unigrams.get(i);
            int count = 0;
            if (wordSet.contains(unigram)) {
                //Count the times the word is in the document
                for (int w = 0; w < words.size(); w++) {
                    if (words.get(w).equals(unigram)) {
                        count += 1;
                    }
                }
            }

            int featureIndex = numOldValues + i;
            newValues[featureIndex] = count;
        }

        for (int i = 0; i < bigrams.size(); i++) {
            Bigram bigram = bigrams.get(i);
            int count = bigram.getTimesInDocument(words);
            int featureIndex = numOldValues + unigrams.size() + i;
            newValues[featureIndex] = count;
        }
    }

    Instance result = new SparseInstance(instance.weight(), newValues);
    return result;
}

From source file:experimentshell.knn.java

@Override
public double classifyInstance(Instance instance) throws Exception {
    // sorting happens automatically with a TreeMap
    Map<Integer, Double> distances = new TreeMap<>();
    int distance = 0;

    // iterate through the data and determine the distance
    for (int i = 0; i < data.numInstances(); i++) {
        for (int j = 0; j < data.numAttributes(); j++) {
            // this is the Manhattan distance
            distance += abs((int) (data.instance(i).value(j) - instance.value(j)));
            // the Euclidean distance
            // distance += pow((int)(data.instance(i).value(j) - instance.value(j)), 2);
        }/* w  ww. jav a 2 s.  co  m*/
        distances.put(distance, data.instance(i).classValue());
        distance = 0;
    }

    // find the majority class of the nearest neighbor
    int count = 0;
    double tempClass;
    int tally[] = new int[data.numClasses()];
    // use k instances to find the majority class and assign that instance
    for (Map.Entry<Integer, Double> entry : distances.entrySet()) {
        if (count >= k)
            break;
        tempClass = entry.getValue();
        tally[(int) tempClass]++;
        count++;
    }

    int maxIndex = 0;
    int majority;
    for (int i = 0; i < data.numClasses(); i++) {
        majority = tally[i];
        if (majority > tally[maxIndex]) {
            maxIndex = i;
        }
    }

    return maxIndex;
}

From source file:expshell.KNN.java

@Override
public double classifyInstance(Instance instance) throws Exception {
    //Sort by using TreeMap
    Map<Integer, Double> distances = new TreeMap<>();
    int distance = 0;

    //go through the data and find hte distance
    for (int i = 0; i < data.numInstances(); i++) {
        for (int j = 0; i < data.numAttributes(); j++) {
            //the Manhattan distance
            distance += abs((int) (data.instance(i).value(j) - instance.value(j)));
        }/*from  w w  w . j  a va 2  s.c  o  m*/
        distances.put(distance, data.instance(i).classValue());
        distance = 0;
    }

    //find the nearest neighbor's major class
    int count = 0;
    double tempClass;

    int tally[] = new int[data.numClasses()];
    // comment...
    for (Map.Entry<Integer, Double> entry : distances.entrySet()) {
        if (count >= k)
            break;
        tempClass = entry.getValue();
        tally[(int) tempClass]++;
        count++;
    }

    int maxIndex = 0;
    int majority;
    for (int i = 0; i < data.numClasses(); i++) {
        majority = tally[i];
        if (majority > tally[maxIndex]) {
            maxIndex = i;
        }
    }
    return maxIndex;
}

From source file:expshell.Neural.java

public double cal(Instance ins) {
    double sum = 0;
    sum += -1 * weights.get(0);//from   ww  w  .j  a  va2 s.c o m
    for (int i = 1; i < weights.size(); i++) {
        sum += ins.value(i - 1) * weights.get(i);
    }

    List<Double> temp = new ArrayList<Double>();
    for (int i = 0; i < ins.numValues(); i++) {
        temp.add(ins.value(i));
    }
    values = temp;
    //if (sum > 0)
    //    return 1.0;
    return threshold(sum);
}