Example usage for weka.core Instance isMissing

List of usage examples for weka.core Instance isMissing

Introduction

In this page you can find the example usage for weka.core Instance isMissing.

Prototype

public boolean isMissing(Attribute att);

Source Link

Document

Tests if a specific value is "missing".

Usage

From source file:WLSVM.java

License:Open Source License

/**
 * Converts an ARFF Instance into a string in the sparse format accepted by
 * LIBSVM/*from  w  w w  .  j a va2 s  .  c o  m*/
 * 
 * @param instance
 * @return
 */
protected String InstanceToSparse(Instance instance) {
    String line = new String();
    int c = (int) instance.classValue();
    if (c == 0)
        c = -1;
    line = c + " ";
    for (int j = 1; j < instance.numAttributes(); j++) {
        if (j - 1 == instance.classIndex()) {
            continue;
        }
        if (instance.isMissing(j - 1))
            continue;
        if (instance.value(j - 1) != 0)
            line += " " + j + ":" + instance.value(j - 1);
    }
    // System.out.println(line); 
    return (line + "\n");
}

From source file:REPTree.java

License:Open Source License

/**
 * Builds classifier./* w  w w .j a  v a2 s . c  o m*/
 * 
 * @param data the data to train with
 * @throws Exception if building fails
 */
public void buildClassifier(Instances data) throws Exception {

    // can classifier handle the data?
    getCapabilities().testWithFail(data);

    // remove instances with missing class
    data = new Instances(data);
    data.deleteWithMissingClass();

    Random random = new Random(m_Seed);

    m_zeroR = null;
    if (data.numAttributes() == 1) {
        m_zeroR = new ZeroR();
        m_zeroR.buildClassifier(data);
        return;
    }

    // Randomize and stratify
    data.randomize(random);
    if (data.classAttribute().isNominal()) {
        data.stratify(m_NumFolds);
    }

    // Split data into training and pruning set
    Instances train = null;
    Instances prune = null;
    if (!m_NoPruning) {
        train = data.trainCV(m_NumFolds, 0, random);
        prune = data.testCV(m_NumFolds, 0);
    } else {
        train = data;
    }

    // Create array of sorted indices and weights
    int[][][] sortedIndices = new int[1][train.numAttributes()][0];
    double[][][] weights = new double[1][train.numAttributes()][0];
    double[] vals = new double[train.numInstances()];
    for (int j = 0; j < train.numAttributes(); j++) {
        if (j != train.classIndex()) {
            weights[0][j] = new double[train.numInstances()];
            if (train.attribute(j).isNominal()) {

                // Handling nominal attributes. Putting indices of
                // instances with missing values at the end.
                sortedIndices[0][j] = new int[train.numInstances()];
                int count = 0;
                for (int i = 0; i < train.numInstances(); i++) {
                    Instance inst = train.instance(i);
                    if (!inst.isMissing(j)) {
                        sortedIndices[0][j][count] = i;
                        weights[0][j][count] = inst.weight();
                        count++;
                    }
                }
                for (int i = 0; i < train.numInstances(); i++) {
                    Instance inst = train.instance(i);
                    if (inst.isMissing(j)) {
                        sortedIndices[0][j][count] = i;
                        weights[0][j][count] = inst.weight();
                        count++;
                    }
                }
            } else {

                // Sorted indices are computed for numeric attributes
                for (int i = 0; i < train.numInstances(); i++) {
                    Instance inst = train.instance(i);
                    vals[i] = inst.value(j);
                }
                sortedIndices[0][j] = Utils.sort(vals);
                for (int i = 0; i < train.numInstances(); i++) {
                    weights[0][j][i] = train.instance(sortedIndices[0][j][i]).weight();
                }
            }
        }
    }

    // Compute initial class counts
    double[] classProbs = new double[train.numClasses()];
    double totalWeight = 0, totalSumSquared = 0;
    for (int i = 0; i < train.numInstances(); i++) {
        Instance inst = train.instance(i);
        if (data.classAttribute().isNominal()) {
            classProbs[(int) inst.classValue()] += inst.weight();
            totalWeight += inst.weight();
        } else {
            classProbs[0] += inst.classValue() * inst.weight();
            totalSumSquared += inst.classValue() * inst.classValue() * inst.weight();
            totalWeight += inst.weight();
        }
    }
    m_Tree = new Tree();
    double trainVariance = 0;
    if (data.classAttribute().isNumeric()) {
        trainVariance = m_Tree.singleVariance(classProbs[0], totalSumSquared, totalWeight) / totalWeight;
        classProbs[0] /= totalWeight;
    }

    // Build tree
    m_Tree.buildTree(sortedIndices, weights, train, totalWeight, classProbs, new Instances(train, 0), m_MinNum,
            m_MinVarianceProp * trainVariance, 0, m_MaxDepth);

    // Insert pruning data and perform reduced error pruning
    if (!m_NoPruning) {
        m_Tree.insertHoldOutSet(prune);
        m_Tree.reducedErrorPrune();
        m_Tree.backfitHoldOutSet();
    }
}

From source file:REPRandomTree.java

License:Open Source License

/**
 * Builds classifier.//  ww  w.  ja  v a2 s. co  m
 * 
 * @param data the data to train with
 * @throws Exception if building fails
 */
public void buildClassifier(Instances data) throws Exception {

    // can classifier handle the data?
    getCapabilities().testWithFail(data);

    // remove instances with missing class
    data = new Instances(data);
    data.deleteWithMissingClass();

    Random random = new Random(m_Seed);

    m_zeroR = null;
    if (data.numAttributes() == 1) {
        m_zeroR = new ZeroR();
        m_zeroR.buildClassifier(data);
        return;
    }

    // Randomize and stratify
    data.randomize(random);
    if (data.classAttribute().isNominal()) {
        data.stratify(m_NumFolds);
    }

    // Split data into training and pruning set
    Instances train = null;
    Instances prune = null;
    if (!m_NoPruning) {
        train = data.trainCV(m_NumFolds, 0, random);
        prune = data.testCV(m_NumFolds, 0);
    } else {
        train = data;
    }

    // Create array of sorted indices and weights
    int[][][] sortedIndices = new int[1][train.numAttributes()][0];
    double[][][] weights = new double[1][train.numAttributes()][0];
    double[] vals = new double[train.numInstances()];
    for (int j = 0; j < train.numAttributes(); j++) {
        if (j != train.classIndex()) {
            weights[0][j] = new double[train.numInstances()];
            if (train.attribute(j).isNominal()) {

                // Handling nominal attributes. Putting indices of
                // instances with missing values at the end.
                sortedIndices[0][j] = new int[train.numInstances()];
                int count = 0;
                for (int i = 0; i < train.numInstances(); i++) {
                    Instance inst = train.instance(i);
                    if (!inst.isMissing(j)) {
                        sortedIndices[0][j][count] = i;
                        weights[0][j][count] = inst.weight();
                        count++;
                    }
                }
                for (int i = 0; i < train.numInstances(); i++) {
                    Instance inst = train.instance(i);
                    if (inst.isMissing(j)) {
                        sortedIndices[0][j][count] = i;
                        weights[0][j][count] = inst.weight();
                        count++;
                    }
                }
            } else {

                // Sorted indices are computed for numeric attributes
                for (int i = 0; i < train.numInstances(); i++) {
                    Instance inst = train.instance(i);
                    vals[i] = inst.value(j);
                }
                sortedIndices[0][j] = Utils.sort(vals);
                for (int i = 0; i < train.numInstances(); i++) {
                    weights[0][j][i] = train.instance(sortedIndices[0][j][i]).weight();
                }
            }
        }
    }

    // Compute initial class counts
    double[] classProbs = new double[train.numClasses()];
    double totalWeight = 0, totalSumSquared = 0;
    for (int i = 0; i < train.numInstances(); i++) {
        Instance inst = train.instance(i);
        if (data.classAttribute().isNominal()) {
            classProbs[(int) inst.classValue()] += inst.weight();
            totalWeight += inst.weight();
        } else {
            classProbs[0] += inst.classValue() * inst.weight();
            totalSumSquared += inst.classValue() * inst.classValue() * inst.weight();
            totalWeight += inst.weight();
        }
    }
    m_Tree = new Tree();
    double trainVariance = 0;
    if (data.classAttribute().isNumeric()) {
        trainVariance = m_Tree.singleVariance(classProbs[0], totalSumSquared, totalWeight) / totalWeight;
        classProbs[0] /= totalWeight;
    }

    // Build tree
    m_Tree.buildTree(sortedIndices, weights, train, totalWeight, classProbs, new Instances(train, 0), m_MinNum,
            m_MinVarianceProp * trainVariance, 0, m_MaxDepth, m_FeatureFrac, random);

    // Insert pruning data and perform reduced error pruning
    if (!m_NoPruning) {
        m_Tree.insertHoldOutSet(prune);
        m_Tree.reducedErrorPrune();
        m_Tree.backfitHoldOutSet();
    }
}

From source file:GainRatioAttributeEval1.java

License:Open Source License

/**
 * evaluates an individual attribute by measuring the gain ratio
 * of the class given the attribute.//from  ww  w. ja va  2  s.co  m
 *
 * @param attribute the index of the attribute to be evaluated
 * @return the gain ratio
 * @throws Exception if the attribute could not be evaluated
 */
public double evaluateAttribute(int attribute) throws Exception {
    int i, j, ii, jj;
    int ni, nj;
    double sum = 0.0;
    ni = m_trainInstances.attribute(attribute).numValues() + 1;
    nj = m_numClasses + 1;
    double[] sumi, sumj;
    Instance inst;
    double temp = 0.0;
    sumi = new double[ni];
    sumj = new double[nj];
    double[][] counts = new double[ni][nj];
    sumi = new double[ni];
    sumj = new double[nj];

    for (i = 0; i < ni; i++) {
        sumi[i] = 0.0;

        for (j = 0; j < nj; j++) {
            sumj[j] = 0.0;
            counts[i][j] = 0.0;
        }
    }

    // Fill the contingency table
    for (i = 0; i < m_numInstances; i++) {
        inst = m_trainInstances.instance(i);

        if (inst.isMissing(attribute)) {
            ii = ni - 1;
        } else {
            ii = (int) inst.value(attribute);
        }

        if (inst.isMissing(m_classIndex)) {
            jj = nj - 1;
        } else {
            jj = (int) inst.value(m_classIndex);
        }

        counts[ii][jj]++;
    }

    // get the row totals
    for (i = 0; i < ni; i++) {
        sumi[i] = 0.0;

        for (j = 0; j < nj; j++) {
            sumi[i] += counts[i][j];
            sum += counts[i][j];
        }
    }

    // get the column totals
    for (j = 0; j < nj; j++) {
        sumj[j] = 0.0;

        for (i = 0; i < ni; i++) {
            sumj[j] += counts[i][j];
        }
    }

    // distribute missing counts
    if (m_missing_merge && (sumi[ni - 1] < m_numInstances) && (sumj[nj - 1] < m_numInstances)) {
        double[] i_copy = new double[sumi.length];
        double[] j_copy = new double[sumj.length];
        double[][] counts_copy = new double[sumi.length][sumj.length];

        for (i = 0; i < ni; i++) {
            System.arraycopy(counts[i], 0, counts_copy[i], 0, sumj.length);
        }

        System.arraycopy(sumi, 0, i_copy, 0, sumi.length);
        System.arraycopy(sumj, 0, j_copy, 0, sumj.length);
        double total_missing = (sumi[ni - 1] + sumj[nj - 1] - counts[ni - 1][nj - 1]);

        // do the missing i's
        if (sumi[ni - 1] > 0.0) {
            for (j = 0; j < nj - 1; j++) {
                if (counts[ni - 1][j] > 0.0) {
                    for (i = 0; i < ni - 1; i++) {
                        temp = ((i_copy[i] / (sum - i_copy[ni - 1])) * counts[ni - 1][j]);
                        counts[i][j] += temp;
                        sumi[i] += temp;
                    }

                    counts[ni - 1][j] = 0.0;
                }
            }
        }

        sumi[ni - 1] = 0.0;

        // do the missing j's
        if (sumj[nj - 1] > 0.0) {
            for (i = 0; i < ni - 1; i++) {
                if (counts[i][nj - 1] > 0.0) {
                    for (j = 0; j < nj - 1; j++) {
                        temp = ((j_copy[j] / (sum - j_copy[nj - 1])) * counts[i][nj - 1]);
                        counts[i][j] += temp;
                        sumj[j] += temp;
                    }

                    counts[i][nj - 1] = 0.0;
                }
            }
        }

        sumj[nj - 1] = 0.0;

        // do the both missing
        if (counts[ni - 1][nj - 1] > 0.0 && total_missing != sum) {
            for (i = 0; i < ni - 1; i++) {
                for (j = 0; j < nj - 1; j++) {
                    temp = (counts_copy[i][j] / (sum - total_missing)) * counts_copy[ni - 1][nj - 1];
                    counts[i][j] += temp;
                    sumi[i] += temp;
                    sumj[j] += temp;
                }
            }

            counts[ni - 1][nj - 1] = 0.0;
        }
    }

    return ContingencyTables.gainRatio(counts);
}

From source file:ID3Chi.java

License:Open Source License

private double[] classifyInstanceWithToken(Instance instance, double token) {

    int numClasses = instance.numClasses();
    double[] tokenDistribution = new double[numClasses];
    if (m_Attribute == null) {
        for (int j = 0; j < numClasses; j++) {
            tokenDistribution[j] = token * m_Distribution[j];
        }//w ww .  ja  va2  s. c  o  m
    } else {
        // for attribute values get token distribution
        if (instance.isMissing(m_Attribute)) {
            for (int j = 0; j < m_Attribute.numValues(); j++) {
                double[] dist = m_Successors[j].classifyInstanceWithToken(instance,
                        token * m_Successors[j].m_Ratio);
                for (int i = 0; i < numClasses; i++) {
                    tokenDistribution[i] += dist[i];
                }
            }
        } else {
            int idx = (int) instance.value(m_Attribute);
            tokenDistribution = m_Successors[idx].classifyInstanceWithToken(instance,
                    token * m_Successors[idx].m_Ratio);
        }
    }

    return tokenDistribution;
}

From source file:ID3Chi.java

License:Open Source License

/**
 * Splits a dataset according to the values of a nominal attribute.
 *
 * @param data//from   ww  w . j  av a 2  s .c o  m
 *            the data which is to be split
 * @param att
 *            the attribute to be used for splitting
 * @return the sets of instances produced by the split
 */
private Instances[] splitData(Instances data, Attribute att) {

    // [att.numValues()] is location for "unknown" values
    Instances[] subset = new Instances[att.numValues() + 1];
    for (int j = 0; j <= att.numValues(); j++) {
        subset[j] = new Instances(data, data.numInstances());
    }

    Enumeration instEnum = data.enumerateInstances();
    while (instEnum.hasMoreElements()) {
        Instance inst = (Instance) instEnum.nextElement();
        if (inst.isMissing(att)) {
            subset[att.numValues()].add(inst);
        } else {
            subset[(int) inst.value(att)].add(inst);
        }
    }
    for (int i = 0; i < subset.length; i++) {
        subset[i].compactify();
    }
    return subset;
}

From source file:adams.data.conversion.WekaInstancesToTimeseries.java

License:Open Source License

/**
 * Performs the actual conversion./*from w  w w  . jav  a2  s.c o m*/
 *
 * @return      the converted data
 * @throws Exception   if something goes wrong with the conversion
 */
@Override
protected Object doConvert() throws Exception {
    Timeseries result;
    Instances input;
    Instance inst;
    int indexDate;
    int indexValue;
    TimeseriesPoint point;
    int i;
    Date timestamp;
    double value;

    input = (Instances) m_Input;

    // determine attribute indices
    m_DateAttribute.setData(input);
    indexDate = m_DateAttribute.getIntIndex();
    if (indexDate == -1)
        throw new IllegalStateException("Failed to located date attribute: " + m_DateAttribute.getIndex());
    m_ValueAttribute.setData(input);
    indexValue = m_ValueAttribute.getIntIndex();
    if (indexValue == -1)
        throw new IllegalStateException("Failed to located value attribute: " + m_ValueAttribute.getIndex());

    result = new Timeseries(input.relationName() + "-" + input.attribute(indexValue).name());
    for (i = 0; i < input.numInstances(); i++) {
        inst = input.instance(i);
        if (!inst.isMissing(indexDate) && !inst.isMissing(indexValue)) {
            timestamp = new Date((long) inst.value(indexDate));
            value = inst.value(indexValue);
            point = new TimeseriesPoint(timestamp, value);
            result.add(point);
        }
    }

    return result;
}

From source file:adams.data.instances.InstanceComparator.java

License:Open Source License

/**
 * Compares its two arguments for order.  Returns a negative integer,
 * zero, or a positive integer as the first argument is less than, equal
 * to, or greater than the second.//  w w w . j av a2 s .c o  m
 *
 * @param o1 the first object to be compared.
 * @param o2 the second object to be compared.
 * @return a negative integer, zero, or a positive integer as the
 *           first argument is less than, equal to, or greater than the
 *          second.
 */
@Override
public int compare(Instance o1, Instance o2) {
    int result;
    Instances header;
    int i;
    int weight;
    double d1;
    double d2;

    result = 0;
    header = o1.dataset();
    i = 0;
    while ((result == 0) && (i < m_Indices.length)) {
        if (o1.isMissing(m_Indices[i]) && o2.isMissing(m_Indices[i]))
            result = 0;
        else if (o1.isMissing(m_Indices[i]))
            result = -1;
        else if (o2.isMissing(m_Indices[i]))
            result = +1;
        else if (header.attribute(m_Indices[i]).isNumeric()) {
            d1 = o1.value(m_Indices[i]);
            d2 = o2.value(m_Indices[i]);
            if (d1 < d2)
                result = -1;
            else if (d1 == d2)
                result = 0;
            else
                result = +1;
        } else {
            result = o1.stringValue(m_Indices[i]).compareTo(o2.stringValue(m_Indices[i]));
        }

        if (!m_Ascending[i])
            result = -result;

        // add weight to index
        weight = (int) Math.pow(10, (m_Indices.length - i));
        result *= weight;

        i++;
    }

    return result;
}

From source file:adams.flow.transformer.WekaGetInstanceValue.java

License:Open Source License

/**
 * Executes the flow item.//from   ww w  .  j ava 2 s. com
 *
 * @return      null if everything is fine, otherwise error message
 */
@Override
protected String doExecute() {
    String result;
    Instance inst;
    int index;

    result = null;

    inst = (Instance) m_InputToken.getPayload();

    try {
        if (m_AttributeName.length() > 0) {
            index = inst.dataset().attribute(m_AttributeName).index();
        } else {
            m_Index.setMax(inst.numAttributes());
            index = m_Index.getIntIndex();
        }
        if (inst.isMissing(index)) {
            m_OutputToken = new Token("?");
        } else {
            switch (inst.attribute(index).type()) {
            case Attribute.NUMERIC:
                m_OutputToken = new Token(inst.value(index));
                break;

            case Attribute.DATE:
            case Attribute.NOMINAL:
            case Attribute.STRING:
            case Attribute.RELATIONAL:
                m_OutputToken = new Token(inst.stringValue(index));
                break;

            default:
                result = "Unhandled attribute type: " + inst.attribute(index).type();
            }
        }
    } catch (Exception e) {
        result = handleException("Failed to obtain value from instance:\n" + inst, e);
    }

    return result;
}

From source file:adams.flow.transformer.WekaInstanceBuffer.java

License:Open Source License

/**
 * Executes the flow item.//from ww w.  jav a  2 s  . c  o m
 *
 * @return      null if everything is fine, otherwise error message
 */
@Override
protected String doExecute() {
    String result;
    Instance[] insts;
    Instance inst;
    double[] values;
    int i;
    int n;
    boolean updated;

    result = null;

    if (m_Operation == Operation.INSTANCE_TO_INSTANCES) {
        if (m_InputToken.getPayload() instanceof Instance) {
            insts = new Instance[] { (Instance) m_InputToken.getPayload() };
        } else {
            insts = (Instance[]) m_InputToken.getPayload();
        }

        for (n = 0; n < insts.length; n++) {
            inst = insts[n];

            if ((m_Buffer != null) && m_CheckHeader) {
                if (!m_Buffer.equalHeaders(inst.dataset())) {
                    getLogger().info("Header changed, resetting buffer");
                    m_Buffer = null;
                }
            }

            // buffer instance
            if (m_Buffer == null)
                m_Buffer = new Instances(inst.dataset(), 0);

            // we need to make sure that string and relational values are in our
            // buffer header and update the current Instance accordingly before
            // buffering it
            values = inst.toDoubleArray();
            updated = false;
            for (i = 0; i < values.length; i++) {
                if (inst.isMissing(i))
                    continue;
                if (inst.attribute(i).isString()) {
                    values[i] = m_Buffer.attribute(i).addStringValue(inst.stringValue(i));
                    updated = true;
                } else if (inst.attribute(i).isRelationValued()) {
                    values[i] = m_Buffer.attribute(i).addRelation(inst.relationalValue(i));
                    updated = true;
                }
            }

            if (updated) {
                if (inst instanceof SparseInstance) {
                    inst = new SparseInstance(inst.weight(), values);
                } else if (inst instanceof BinarySparseInstance) {
                    inst = new BinarySparseInstance(inst.weight(), values);
                } else {
                    if (!(inst instanceof DenseInstance)) {
                        getLogger().severe("Unhandled instance class (" + inst.getClass().getName() + "), "
                                + "defaulting to " + DenseInstance.class.getName());
                    }
                    inst = new DenseInstance(inst.weight(), values);
                }
            } else {
                inst = (Instance) inst.copy();
            }

            m_Buffer.add(inst);
        }

        if (m_Buffer.numInstances() % m_Interval == 0) {
            m_OutputToken = new Token(m_Buffer);
            if (m_ClearBuffer)
                m_Buffer = null;
        }
    } else if (m_Operation == Operation.INSTANCES_TO_INSTANCE) {
        m_Buffer = (Instances) m_InputToken.getPayload();
        m_Iterator = m_Buffer.iterator();
    } else {
        throw new IllegalStateException("Unhandled operation: " + m_Operation);
    }

    return result;
}