Example usage for weka.core Instance isMissing

Introduction

In this page you can find the example usage for weka.core Instance isMissing.

Prototype

public boolean isMissing(Attribute att);

Source Link

Document

Tests if a specific value is "missing".

Usage

From source file:WLSVM.java

License:Open Source License

/**
 * Converts an ARFF Instance into a string in the sparse format accepted by
 * LIBSVM/*from  w  w w  .  j a va2 s  .  c o  m*/
 * 
 * @param instance
 * @return
 */
protected String InstanceToSparse(Instance instance) {
    String line = new String();
    int c = (int) instance.classValue();
    if (c == 0)
        c = -1;
    line = c + " ";
    for (int j = 1; j < instance.numAttributes(); j++) {
        if (j - 1 == instance.classIndex()) {
            continue;
        }
        if (instance.isMissing(j - 1))
            continue;
        if (instance.value(j - 1) != 0)
            line += " " + j + ":" + instance.value(j - 1);
    }
    // System.out.println(line); 
    return (line + "\n");
}

From source file:REPTree.java

License:Open Source License

/**
 * Builds classifier./* w  w w .j a  v a2 s . c  o m*/
 * 
 * @param data the data to train with
 * @throws Exception if building fails
 */
public void buildClassifier(Instances data) throws Exception {

    // can classifier handle the data?
    getCapabilities().testWithFail(data);

    // remove instances with missing class
    data = new Instances(data);
    data.deleteWithMissingClass();

    Random random = new Random(m_Seed);

    m_zeroR = null;
    if (data.numAttributes() == 1) {
        m_zeroR = new ZeroR();
        m_zeroR.buildClassifier(data);
        return;
    }

    // Randomize and stratify
    data.randomize(random);
    if (data.classAttribute().isNominal()) {
        data.stratify(m_NumFolds);
    }

    // Split data into training and pruning set
    Instances train = null;
    Instances prune = null;
    if (!m_NoPruning) {
        train = data.trainCV(m_NumFolds, 0, random);
        prune = data.testCV(m_NumFolds, 0);
    } else {
        train = data;
    }

    // Create array of sorted indices and weights
    int[][][] sortedIndices = new int[1][train.numAttributes()][0];
    double[][][] weights = new double[1][train.numAttributes()][0];
    double[] vals = new double[train.numInstances()];
    for (int j = 0; j < train.numAttributes(); j++) {
        if (j != train.classIndex()) {
            weights[0][j] = new double[train.numInstances()];
            if (train.attribute(j).isNominal()) {

                // Handling nominal attributes. Putting indices of
                // instances with missing values at the end.
                sortedIndices[0][j] = new int[train.numInstances()];
                int count = 0;
                for (int i = 0; i < train.numInstances(); i++) {
                    Instance inst = train.instance(i);
                    if (!inst.isMissing(j)) {
                        sortedIndices[0][j][count] = i;
                        weights[0][j][count] = inst.weight();
                        count++;
                    }
                }
                for (int i = 0; i < train.numInstances(); i++) {
                    Instance inst = train.instance(i);
                    if (inst.isMissing(j)) {
                        sortedIndices[0][j][count] = i;
                        weights[0][j][count] = inst.weight();
                        count++;
                    }
                }
            } else {

                // Sorted indices are computed for numeric attributes
                for (int i = 0; i < train.numInstances(); i++) {
                    Instance inst = train.instance(i);
                    vals[i] = inst.value(j);
                }
                sortedIndices[0][j] = Utils.sort(vals);
                for (int i = 0; i < train.numInstances(); i++) {
                    weights[0][j][i] = train.instance(sortedIndices[0][j][i]).weight();
                }
            }
        }
    }

    // Compute initial class counts
    double[] classProbs = new double[train.numClasses()];
    double totalWeight = 0, totalSumSquared = 0;
    for (int i = 0; i < train.numInstances(); i++) {
        Instance inst = train.instance(i);
        if (data.classAttribute().isNominal()) {
            classProbs[(int) inst.classValue()] += inst.weight();
            totalWeight += inst.weight();
        } else {
            classProbs[0] += inst.classValue() * inst.weight();
            totalSumSquared += inst.classValue() * inst.classValue() * inst.weight();
            totalWeight += inst.weight();
        }
    }
    m_Tree = new Tree();
    double trainVariance = 0;
    if (data.classAttribute().isNumeric()) {
        trainVariance = m_Tree.singleVariance(classProbs[0], totalSumSquared, totalWeight) / totalWeight;
        classProbs[0] /= totalWeight;
    }

    // Build tree
    m_Tree.buildTree(sortedIndices, weights, train, totalWeight, classProbs, new Instances(train, 0), m_MinNum,
            m_MinVarianceProp * trainVariance, 0, m_MaxDepth);

    // Insert pruning data and perform reduced error pruning
    if (!m_NoPruning) {
        m_Tree.insertHoldOutSet(prune);
        m_Tree.reducedErrorPrune();
        m_Tree.backfitHoldOutSet();
    }
}

From source file:REPRandomTree.java

License:Open Source License

/**
 * Builds classifier.//  ww  w.  ja  v a2 s. co  m
 * 
 * @param data the data to train with
 * @throws Exception if building fails
 */
public void buildClassifier(Instances data) throws Exception {

    // can classifier handle the data?
    getCapabilities().testWithFail(data);

    // remove instances with missing class
    data = new Instances(data);
    data.deleteWithMissingClass();

    Random random = new Random(m_Seed);

    m_zeroR = null;
    if (data.numAttributes() == 1) {
        m_zeroR = new ZeroR();
        m_zeroR.buildClassifier(data);
        return;
    }

    // Randomize and stratify
    data.randomize(random);
    if (data.classAttribute().isNominal()) {
        data.stratify(m_NumFolds);
    }

    // Split data into training and pruning set
    Instances train = null;
    Instances prune = null;
    if (!m_NoPruning) {
        train = data.trainCV(m_NumFolds, 0, random);
        prune = data.testCV(m_NumFolds, 0);
    } else {
        train = data;
    }

    // Create array of sorted indices and weights
    int[][][] sortedIndices = new int[1][train.numAttributes()][0];
    double[][][] weights = new double[1][train.numAttributes()][0];
    double[] vals = new double[train.numInstances()];
    for (int j = 0; j < train.numAttributes(); j++) {
        if (j != train.classIndex()) {
            weights[0][j] = new double[train.numInstances()];
            if (train.attribute(j).isNominal()) {

                // Handling nominal attributes. Putting indices of
                // instances with missing values at the end.
                sortedIndices[0][j] = new int[train.numInstances()];
                int count = 0;
                for (int i = 0; i < train.numInstances(); i++) {
                    Instance inst = train.instance(i);
                    if (!inst.isMissing(j)) {
                        sortedIndices[0][j][count] = i;
                        weights[0][j][count] = inst.weight();
                        count++;
                    }
                }
                for (int i = 0; i < train.numInstances(); i++) {
                    Instance inst = train.instance(i);
                    if (inst.isMissing(j)) {
                        sortedIndices[0][j][count] = i;
                        weights[0][j][count] = inst.weight();
                        count++;
                    }
                }
            } else {

                // Sorted indices are computed for numeric attributes
                for (int i = 0; i < train.numInstances(); i++) {
                    Instance inst = train.instance(i);
                    vals[i] = inst.value(j);
                }
                sortedIndices[0][j] = Utils.sort(vals);
                for (int i = 0; i < train.numInstances(); i++) {
                    weights[0][j][i] = train.instance(sortedIndices[0][j][i]).weight();
                }
            }
        }
    }

    // Compute initial class counts
    double[] classProbs = new double[train.numClasses()];
    double totalWeight = 0, totalSumSquared = 0;
    for (int i = 0; i < train.numInstances(); i++) {
        Instance inst = train.instance(i);
        if (data.classAttribute().isNominal()) {
            classProbs[(int) inst.classValue()] += inst.weight();
            totalWeight += inst.weight();
        } else {
            classProbs[0] += inst.classValue() * inst.weight();
            totalSumSquared += inst.classValue() * inst.classValue() * inst.weight();
            totalWeight += inst.weight();
        }
    }
    m_Tree = new Tree();
    double trainVariance = 0;
    if (data.classAttribute().isNumeric()) {
        trainVariance = m_Tree.singleVariance(classProbs[0], totalSumSquared, totalWeight) / totalWeight;
        classProbs[0] /= totalWeight;
    }

    // Build tree
    m_Tree.buildTree(sortedIndices, weights, train, totalWeight, classProbs, new Instances(train, 0), m_MinNum,
            m_MinVarianceProp * trainVariance, 0, m_MaxDepth, m_FeatureFrac, random);

    // Insert pruning data and perform reduced error pruning
    if (!m_NoPruning) {
        m_Tree.insertHoldOutSet(prune);
        m_Tree.reducedErrorPrune();
        m_Tree.backfitHoldOutSet();
    }
}

From source file:GainRatioAttributeEval1.java

License:Open Source License

/**
 * evaluates an individual attribute by measuring the gain ratio
 * of the class given the attribute.//from  ww  w. ja va  2  s.co  m
 *
 * @param attribute the index of the attribute to be evaluated
 * @return the gain ratio
 * @throws Exception if the attribute could not be evaluated
 */
public double evaluateAttribute(int attribute) throws Exception {
    int i, j, ii, jj;
    int ni, nj;
    double sum = 0.0;
    ni = m_trainInstances.attribute(attribute).numValues() + 1;
    nj = m_numClasses + 1;
    double[] sumi, sumj;
    Instance inst;
    double temp = 0.0;
    sumi = new double[ni];
    sumj = new double[nj];
    double[][] counts = new double[ni][nj];
    sumi = new double[ni];
    sumj = new double[nj];

    for (i = 0; i < ni; i++) {
        sumi[i] = 0.0;

        for (j = 0; j < nj; j++) {
            sumj[j] = 0.0;
            counts[i][j] = 0.0;
        }
    }

    // Fill the contingency table
    for (i = 0; i < m_numInstances; i++) {
        inst = m_trainInstances.instance(i);

        if (inst.isMissing(attribute)) {
            ii = ni - 1;
        } else {
            ii = (int) inst.value(attribute);
        }

        if (inst.isMissing(m_classIndex)) {
            jj = nj - 1;
        } else {
            jj = (int) inst.value(m_classIndex);
        }

        counts[ii][jj]++;
    }

    // get the row totals
    for (i = 0; i < ni; i++) {
        sumi[i] = 0.0;

        for (j = 0; j < nj; j++) {
            sumi[i] += counts[i][j];
            sum += counts[i][j];
        }
    }

    // get the column totals
    for (j = 0; j < nj; j++) {
        sumj[j] = 0.0;

        for (i = 0; i < ni; i++) {
            sumj[j] += counts[i][j];
        }
    }

    // distribute missing counts
    if (m_missing_merge && (sumi[ni - 1] < m_numInstances) && (sumj[nj - 1] < m_numInstances)) {
        double[] i_copy = new double[sumi.length];
        double[] j_copy = new double[sumj.length];
        double[][] counts_copy = new double[sumi.length][sumj.length];

        for (i = 0; i < ni; i++) {
            System.arraycopy(counts[i], 0, counts_copy[i], 0, sumj.length);
        }

        System.arraycopy(sumi, 0, i_copy, 0, sumi.length);
        System.arraycopy(sumj, 0, j_copy, 0, sumj.length);
        double total_missing = (sumi[ni - 1] + sumj[nj - 1] - counts[ni - 1][nj - 1]);

        // do the missing i's
        if (sumi[ni - 1] > 0.0) {
            for (j = 0; j < nj - 1; j++) {
                if (counts[ni - 1][j] > 0.0) {
                    for (i = 0; i < ni - 1; i++) {
                        temp = ((i_copy[i] / (sum - i_copy[ni - 1])) * counts[ni - 1][j]);
                        counts[i][j] += temp;
                        sumi[i] += temp;
                    }

                    counts[ni - 1][j] = 0.0;
                }
            }
        }

        sumi[ni - 1] = 0.0;

        // do the missing j's
        if (sumj[nj - 1] > 0.0) {
            for (i = 0; i < ni - 1; i++) {
                if (counts[i][nj - 1] > 0.0) {
                    for (j = 0; j < nj - 1; j++) {
                        temp = ((j_copy[j] / (sum - j_copy[nj - 1])) * counts[i][nj - 1]);
                        counts[i][j] += temp;
                        sumj[j] += temp;
                    }

                    counts[i][nj - 1] = 0.0;
                }
            }
        }

        sumj[nj - 1] = 0.0;

        // do the both missing
        if (counts[ni - 1][nj - 1] > 0.0 && total_missing != sum) {
            for (i = 0; i < ni - 1; i++) {
                for (j = 0; j < nj - 1; j++) {
                    temp = (counts_copy[i][j] / (sum - total_missing)) * counts_copy[ni - 1][nj - 1];
                    counts[i][j] += temp;
                    sumi[i] += temp;
                    sumj[j] += temp;
                }
            }

            counts[ni - 1][nj - 1] = 0.0;
        }
    }

    return ContingencyTables.gainRatio(counts);
}

From source file:ID3Chi.java

License:Open Source License

private double[] classifyInstanceWithToken(Instance instance, double token) {

    int numClasses = instance.numClasses();
    double[] tokenDistribution = new double[numClasses];
    if (m_Attribute == null) {
        for (int j = 0; j < numClasses; j++) {
            tokenDistribution[j] = token * m_Distribution[j];
        }//w ww .  ja  va2  s. c  o  m
    } else {
        // for attribute values get token distribution
        if (instance.isMissing(m_Attribute)) {
            for (int j = 0; j < m_Attribute.numValues(); j++) {
                double[] dist = m_Successors[j].classifyInstanceWithToken(instance,
                        token * m_Successors[j].m_Ratio);
                for (int i = 0; i < numClasses; i++) {
                    tokenDistribution[i] += dist[i];
                }
            }
        } else {
            int idx = (int) instance.value(m_Attribute);
            tokenDistribution = m_Successors[idx].classifyInstanceWithToken(instance,
                    token * m_Successors[idx].m_Ratio);
        }
    }

    return tokenDistribution;
}

From source file:ID3Chi.java

License:Open Source License

/**
 * Splits a dataset according to the values of a nominal attribute.
 *
 * @param data//from   ww  w . j  av a 2  s .c o  m
 *            the data which is to be split
 * @param att
 *            the attribute to be used for splitting
 * @return the sets of instances produced by the split
 */
private Instances[] splitData(Instances data, Attribute att) {

    // [att.numValues()] is location for "unknown" values
    Instances[] subset = new Instances[att.numValues() + 1];
    for (int j = 0; j <= att.numValues(); j++) {
        subset[j] = new Instances(data, data.numInstances());
    }

    Enumeration instEnum = data.enumerateInstances();
    while (instEnum.hasMoreElements()) {
        Instance inst = (Instance) instEnum.nextElement();
        if (inst.isMissing(att)) {
            subset[att.numValues()].add(inst);
        } else {
            subset[(int) inst.value(att)].add(inst);
        }
    }
    for (int i = 0; i < subset.length; i++) {
        subset[i].compactify();
    }
    return subset;
}

From source file:adams.data.conversion.WekaInstancesToTimeseries.java

License:Open Source License

/**
 * Performs the actual conversion./*from w  w w  . jav  a2  s.c o m*/
 *
 * @return      the converted data
 * @throws Exception   if something goes wrong with the conversion
 */
@Override
protected Object doConvert() throws Exception {
    Timeseries result;
    Instances input;
    Instance inst;
    int indexDate;
    int indexValue;
    TimeseriesPoint point;
    int i;
    Date timestamp;
    double value;

    input = (Instances) m_Input;

    // determine attribute indices
    m_DateAttribute.setData(input);
    indexDate = m_DateAttribute.getIntIndex();
    if (indexDate == -1)
        throw new IllegalStateException("Failed to located date attribute: " + m_DateAttribute.getIndex());
    m_ValueAttribute.setData(input);
    indexValue = m_ValueAttribute.getIntIndex();
    if (indexValue == -1)
        throw new IllegalStateException("Failed to located value attribute: " + m_ValueAttribute.getIndex());

    result = new Timeseries(input.relationName() + "-" + input.attribute(indexValue).name());
    for (i = 0; i < input.numInstances(); i++) {
        inst = input.instance(i);
        if (!inst.isMissing(indexDate) && !inst.isMissing(indexValue)) {
            timestamp = new Date((long) inst.value(indexDate));
            value = inst.value(indexValue);
            point = new TimeseriesPoint(timestamp, value);
            result.add(point);
        }
    }

    return result;
}

From source file:adams.data.instances.InstanceComparator.java

License:Open Source License

/**
 * Compares its two arguments for order.  Returns a negative integer,
 * zero, or a positive integer as the first argument is less than, equal
 * to, or greater than the second.//  w w w . j av a2 s .c o  m
 *
 * @param o1 the first object to be compared.
 * @param o2 the second object to be compared.
 * @return a negative integer, zero, or a positive integer as the
 *           first argument is less than, equal to, or greater than the
 *          second.
 */
@Override
public int compare(Instance o1, Instance o2) {
    int result;
    Instances header;
    int i;
    int weight;
    double d1;
    double d2;

    result = 0;
    header = o1.dataset();
    i = 0;
    while ((result == 0) && (i < m_Indices.length)) {
        if (o1.isMissing(m_Indices[i]) && o2.isMissing(m_Indices[i]))
            result = 0;
        else if (o1.isMissing(m_Indices[i]))
            result = -1;
        else if (o2.isMissing(m_Indices[i]))
            result = +1;
        else if (header.attribute(m_Indices[i]).isNumeric()) {
            d1 = o1.value(m_Indices[i]);
            d2 = o2.value(m_Indices[i]);
            if (d1 < d2)
                result = -1;
            else if (d1 == d2)
                result = 0;
            else
                result = +1;
        } else {
            result = o1.stringValue(m_Indices[i]).compareTo(o2.stringValue(m_Indices[i]));
        }

        if (!m_Ascending[i])
            result = -result;

        // add weight to index
        weight = (int) Math.pow(10, (m_Indices.length - i));
        result *= weight;

        i++;
    }

    return result;
}

From source file:adams.flow.transformer.WekaGetInstanceValue.java

License:Open Source License

/**
 * Executes the flow item.//from   ww w  .  j ava 2 s. com
 *
 * @return      null if everything is fine, otherwise error message
 */
@Override
protected String doExecute() {
    String result;
    Instance inst;
    int index;

    result = null;

    inst = (Instance) m_InputToken.getPayload();

    try {
        if (m_AttributeName.length() > 0) {
            index = inst.dataset().attribute(m_AttributeName).index();
        } else {
            m_Index.setMax(inst.numAttributes());
            index = m_Index.getIntIndex();
        }
        if (inst.isMissing(index)) {
            m_OutputToken = new Token("?");
        } else {
            switch (inst.attribute(index).type()) {
            case Attribute.NUMERIC:
                m_OutputToken = new Token(inst.value(index));
                break;

            case Attribute.DATE:
            case Attribute.NOMINAL:
            case Attribute.STRING:
            case Attribute.RELATIONAL:
                m_OutputToken = new Token(inst.stringValue(index));
                break;

            default:
                result = "Unhandled attribute type: " + inst.attribute(index).type();
            }
        }
    } catch (Exception e) {
        result = handleException("Failed to obtain value from instance:\n" + inst, e);
    }

    return result;
}

From source file:adams.flow.transformer.WekaInstanceBuffer.java

License:Open Source License

/**
 * Executes the flow item.//from ww w.  jav a  2 s  . c  o m
 *
 * @return      null if everything is fine, otherwise error message
 */
@Override
protected String doExecute() {
    String result;
    Instance[] insts;
    Instance inst;
    double[] values;
    int i;
    int n;
    boolean updated;

    result = null;

    if (m_Operation == Operation.INSTANCE_TO_INSTANCES) {
        if (m_InputToken.getPayload() instanceof Instance) {
            insts = new Instance[] { (Instance) m_InputToken.getPayload() };
        } else {
            insts = (Instance[]) m_InputToken.getPayload();
        }

        for (n = 0; n < insts.length; n++) {
            inst = insts[n];

            if ((m_Buffer != null) && m_CheckHeader) {
                if (!m_Buffer.equalHeaders(inst.dataset())) {
                    getLogger().info("Header changed, resetting buffer");
                    m_Buffer = null;
                }
            }

            // buffer instance
            if (m_Buffer == null)
                m_Buffer = new Instances(inst.dataset(), 0);

            // we need to make sure that string and relational values are in our
            // buffer header and update the current Instance accordingly before
            // buffering it
            values = inst.toDoubleArray();
            updated = false;
            for (i = 0; i < values.length; i++) {
                if (inst.isMissing(i))
                    continue;
                if (inst.attribute(i).isString()) {
                    values[i] = m_Buffer.attribute(i).addStringValue(inst.stringValue(i));
                    updated = true;
                } else if (inst.attribute(i).isRelationValued()) {
                    values[i] = m_Buffer.attribute(i).addRelation(inst.relationalValue(i));
                    updated = true;
                }
            }

            if (updated) {
                if (inst instanceof SparseInstance) {
                    inst = new SparseInstance(inst.weight(), values);
                } else if (inst instanceof BinarySparseInstance) {
                    inst = new BinarySparseInstance(inst.weight(), values);
                } else {
                    if (!(inst instanceof DenseInstance)) {
                        getLogger().severe("Unhandled instance class (" + inst.getClass().getName() + "), "
                                + "defaulting to " + DenseInstance.class.getName());
                    }
                    inst = new DenseInstance(inst.weight(), values);
                }
            } else {
                inst = (Instance) inst.copy();
            }

            m_Buffer.add(inst);
        }

        if (m_Buffer.numInstances() % m_Interval == 0) {
            m_OutputToken = new Token(m_Buffer);
            if (m_ClearBuffer)
                m_Buffer = null;
        }
    } else if (m_Operation == Operation.INSTANCES_TO_INSTANCE) {
        m_Buffer = (Instances) m_InputToken.getPayload();
        m_Iterator = m_Buffer.iterator();
    } else {
        throw new IllegalStateException("Unhandled operation: " + m_Operation);
    }

    return result;
}