Example usage for weka.core Instance replaceMissingValues

List of usage examples for weka.core Instance replaceMissingValues

Introduction

In this page you can find the example usage for weka.core Instance replaceMissingValues.

Prototype

public void replaceMissingValues(double[] array);

Source Link

Document

Replaces all missing values in the instance with the values contained in the given array.

Usage

From source file:com.yahoo.research.scoring.classifier.NutchOnlineClassifier.java

License:Apache License

/**
 * Converts an {@link AnthURL} into an {@link Instance} which can be handled
 * by the {@link Classifier}.//from w w  w  .jav a 2 s  .co m
 * 
 * @param url
 *            the {@link AnthURL} which should be transformed/converted.
 * @return the resulting {@link Instance}.
 */
private static Instance convert(AnthURL url) {
    if (url != null) {

        Instance inst = new SparseInstance(dimension);
        inst.replaceMissingValues(replaceMissingValues);

        inst.setDataset(instances);
        inst.setValue(attributesIndex.get("class"), (url.sem ? "sem" : "nonsem"));
        inst.setValue(attributesIndex.get("sempar"), (url.semFather ? 1 : 0));
        inst.setValue(attributesIndex.get("nonsempar"), (url.nonSemFather ? 1 : 0));
        inst.setValue(attributesIndex.get("semsib"), (url.semSibling ? 1 : 0));
        inst.setValue(attributesIndex.get("nonsempar"), (url.nonSemFather ? 1 : 0));
        inst.setValue(attributesIndex.get("domain"), url.uri.getHost());
        Set<String> tokens = new HashSet<String>();

        tokens.addAll(tokenizer(url.uri.getPath()));
        tokens.addAll(tokenizer(url.uri.getQuery()));
        tokens.addAll(tokenizer(url.uri.getFragment()));
        for (String tok : tokens) {
            inst.setValue(attributesIndex.get(getAttributeNameOfHash(getHash(tok, hashTrickSize))), 1);
        }
        return inst;

    } else {
        System.out.println("Input AnthURL for convertion into instance was null.");
        return null;
    }
}

From source file:en_deep.mlprocess.manipulation.featmodif.FeatureModifierFilter.java

License:Open Source License

/**
 * Convert a single instance over if the class is nominal. The converted
 * instance is added to the end of the output queue.
 *
 * @param instance the instance to convert
 *//*from w w w.  ja  v a 2  s  .co  m*/
private void convertInstance(Instance instance) {

    double[] vals = new double[outputFormatPeek().numAttributes()];
    String[] stringVals = new String[vals.length];
    int attSoFar = 0;

    for (int j = 0; j < getInputFormat().numAttributes(); j++) {
        Attribute att = instance.attribute(j);
        if (!m_Columns.isInRange(j)) {
            vals[attSoFar] = instance.value(j);
            attSoFar++;
        } else {
            // store new string values, make double values "missing" for now (if some string
            // values are missing, the double values will remain missing)
            if (instance.value(0) == 12 && instance.value(1) == 9 && att.name().equals("sempos")) {
                attSoFar = attSoFar;
            }
            attSoFar += getAttributeOutputValue(att, instance.value(j), vals, stringVals, attSoFar);
        }
    }
    Instance inst = null;
    if (instance instanceof SparseInstance) {
        inst = new SparseInstance(instance.weight(), vals);
    } else {
        inst = new DenseInstance(instance.weight(), vals);
    }

    inst.setDataset(getOutputFormat());
    copyValues(inst, false, instance.dataset(), getOutputFormat());

    // add new string values to the output data set and to the instance
    for (int i = 0; i < stringVals.length; ++i) {
        if (stringVals[i] != null) {
            vals[i] = inst.dataset().attribute(i).addStringValue(stringVals[i]);
        }
    }
    inst.replaceMissingValues(vals);

    inst.setDataset(getOutputFormat());
    push(inst);
}

From source file:en_deep.mlprocess.manipulation.featmodif.ReplaceMissing.java

License:Open Source License

/**
 * Convert a single instance over if the class is nominal. The converted
 * instance is added to the end of the output queue.
 *
 * @param instance the instance to convert
 *///from w  ww  .  j  a v a  2  s .co m
private void convertInstance(Instance instance) {

    // create a copy of the input instance
    Instance inst = null;

    if (instance instanceof SparseInstance) {
        inst = new SparseInstance(instance.weight(), instance.toDoubleArray());
    } else {
        inst = new DenseInstance(instance.weight(), instance.toDoubleArray());
    }

    // copy the string values from this instance as well (only the existing ones)
    inst.setDataset(getOutputFormat());
    copyValues(inst, false, instance.dataset(), getOutputFormat()); // beware of weird behavior of this function (see source)!!
    inst.setDataset(getOutputFormat());

    // find the missing values to be filled + the double values for the new "missing" label and store it
    double[] vals = instance.toDoubleArray();

    for (int j = 0; j < getInputFormat().numAttributes(); j++) {

        Attribute att = instance.attribute(j);

        if (m_Columns.isInRange(j) && instance.isMissing(j)) {
            // find the "missing" value in the output nominal attribute
            if (att.isNominal()) {
                vals[j] = inst.dataset().attribute(j).indexOfValue(m_ReplVal);
            }
            // add a string value for the new "missing" label
            else if (att.isString()) {
                vals[j] = inst.dataset().attribute(j).addStringValue(m_ReplVal);
            }
        }
    }

    // fill in the missing values found
    inst.replaceMissingValues(vals);

    push(inst);
}