List of usage examples for weka.core Instance replaceMissingValues
public void replaceMissingValues(double[] array);
From source file:com.yahoo.research.scoring.classifier.NutchOnlineClassifier.java
License:Apache License
/** * Converts an {@link AnthURL} into an {@link Instance} which can be handled * by the {@link Classifier}.//from w w w .jav a 2 s .co m * * @param url * the {@link AnthURL} which should be transformed/converted. * @return the resulting {@link Instance}. */ private static Instance convert(AnthURL url) { if (url != null) { Instance inst = new SparseInstance(dimension); inst.replaceMissingValues(replaceMissingValues); inst.setDataset(instances); inst.setValue(attributesIndex.get("class"), (url.sem ? "sem" : "nonsem")); inst.setValue(attributesIndex.get("sempar"), (url.semFather ? 1 : 0)); inst.setValue(attributesIndex.get("nonsempar"), (url.nonSemFather ? 1 : 0)); inst.setValue(attributesIndex.get("semsib"), (url.semSibling ? 1 : 0)); inst.setValue(attributesIndex.get("nonsempar"), (url.nonSemFather ? 1 : 0)); inst.setValue(attributesIndex.get("domain"), url.uri.getHost()); Set<String> tokens = new HashSet<String>(); tokens.addAll(tokenizer(url.uri.getPath())); tokens.addAll(tokenizer(url.uri.getQuery())); tokens.addAll(tokenizer(url.uri.getFragment())); for (String tok : tokens) { inst.setValue(attributesIndex.get(getAttributeNameOfHash(getHash(tok, hashTrickSize))), 1); } return inst; } else { System.out.println("Input AnthURL for convertion into instance was null."); return null; } }
From source file:en_deep.mlprocess.manipulation.featmodif.FeatureModifierFilter.java
License:Open Source License
/** * Convert a single instance over if the class is nominal. The converted * instance is added to the end of the output queue. * * @param instance the instance to convert *//*from w w w. ja v a 2 s .co m*/ private void convertInstance(Instance instance) { double[] vals = new double[outputFormatPeek().numAttributes()]; String[] stringVals = new String[vals.length]; int attSoFar = 0; for (int j = 0; j < getInputFormat().numAttributes(); j++) { Attribute att = instance.attribute(j); if (!m_Columns.isInRange(j)) { vals[attSoFar] = instance.value(j); attSoFar++; } else { // store new string values, make double values "missing" for now (if some string // values are missing, the double values will remain missing) if (instance.value(0) == 12 && instance.value(1) == 9 && att.name().equals("sempos")) { attSoFar = attSoFar; } attSoFar += getAttributeOutputValue(att, instance.value(j), vals, stringVals, attSoFar); } } Instance inst = null; if (instance instanceof SparseInstance) { inst = new SparseInstance(instance.weight(), vals); } else { inst = new DenseInstance(instance.weight(), vals); } inst.setDataset(getOutputFormat()); copyValues(inst, false, instance.dataset(), getOutputFormat()); // add new string values to the output data set and to the instance for (int i = 0; i < stringVals.length; ++i) { if (stringVals[i] != null) { vals[i] = inst.dataset().attribute(i).addStringValue(stringVals[i]); } } inst.replaceMissingValues(vals); inst.setDataset(getOutputFormat()); push(inst); }
From source file:en_deep.mlprocess.manipulation.featmodif.ReplaceMissing.java
License:Open Source License
/** * Convert a single instance over if the class is nominal. The converted * instance is added to the end of the output queue. * * @param instance the instance to convert *///from w ww . j a v a 2 s .co m private void convertInstance(Instance instance) { // create a copy of the input instance Instance inst = null; if (instance instanceof SparseInstance) { inst = new SparseInstance(instance.weight(), instance.toDoubleArray()); } else { inst = new DenseInstance(instance.weight(), instance.toDoubleArray()); } // copy the string values from this instance as well (only the existing ones) inst.setDataset(getOutputFormat()); copyValues(inst, false, instance.dataset(), getOutputFormat()); // beware of weird behavior of this function (see source)!! inst.setDataset(getOutputFormat()); // find the missing values to be filled + the double values for the new "missing" label and store it double[] vals = instance.toDoubleArray(); for (int j = 0; j < getInputFormat().numAttributes(); j++) { Attribute att = instance.attribute(j); if (m_Columns.isInRange(j) && instance.isMissing(j)) { // find the "missing" value in the output nominal attribute if (att.isNominal()) { vals[j] = inst.dataset().attribute(j).indexOfValue(m_ReplVal); } // add a string value for the new "missing" label else if (att.isString()) { vals[j] = inst.dataset().attribute(j).addStringValue(m_ReplVal); } } } // fill in the missing values found inst.replaceMissingValues(vals); push(inst); }