List of usage examples for weka.core Instance isMissing
public boolean isMissing(Attribute att);
From source file:WLSVM.java
License:Open Source License
/** * Converts an ARFF Instance into a string in the sparse format accepted by * LIBSVM/*from w w w . j a va2 s . c o m*/ * * @param instance * @return */ protected String InstanceToSparse(Instance instance) { String line = new String(); int c = (int) instance.classValue(); if (c == 0) c = -1; line = c + " "; for (int j = 1; j < instance.numAttributes(); j++) { if (j - 1 == instance.classIndex()) { continue; } if (instance.isMissing(j - 1)) continue; if (instance.value(j - 1) != 0) line += " " + j + ":" + instance.value(j - 1); } // System.out.println(line); return (line + "\n"); }
From source file:REPTree.java
License:Open Source License
/** * Builds classifier./* w w w .j a v a2 s . c o m*/ * * @param data the data to train with * @throws Exception if building fails */ public void buildClassifier(Instances data) throws Exception { // can classifier handle the data? getCapabilities().testWithFail(data); // remove instances with missing class data = new Instances(data); data.deleteWithMissingClass(); Random random = new Random(m_Seed); m_zeroR = null; if (data.numAttributes() == 1) { m_zeroR = new ZeroR(); m_zeroR.buildClassifier(data); return; } // Randomize and stratify data.randomize(random); if (data.classAttribute().isNominal()) { data.stratify(m_NumFolds); } // Split data into training and pruning set Instances train = null; Instances prune = null; if (!m_NoPruning) { train = data.trainCV(m_NumFolds, 0, random); prune = data.testCV(m_NumFolds, 0); } else { train = data; } // Create array of sorted indices and weights int[][][] sortedIndices = new int[1][train.numAttributes()][0]; double[][][] weights = new double[1][train.numAttributes()][0]; double[] vals = new double[train.numInstances()]; for (int j = 0; j < train.numAttributes(); j++) { if (j != train.classIndex()) { weights[0][j] = new double[train.numInstances()]; if (train.attribute(j).isNominal()) { // Handling nominal attributes. Putting indices of // instances with missing values at the end. sortedIndices[0][j] = new int[train.numInstances()]; int count = 0; for (int i = 0; i < train.numInstances(); i++) { Instance inst = train.instance(i); if (!inst.isMissing(j)) { sortedIndices[0][j][count] = i; weights[0][j][count] = inst.weight(); count++; } } for (int i = 0; i < train.numInstances(); i++) { Instance inst = train.instance(i); if (inst.isMissing(j)) { sortedIndices[0][j][count] = i; weights[0][j][count] = inst.weight(); count++; } } } else { // Sorted indices are computed for numeric attributes for (int i = 0; i < train.numInstances(); i++) { Instance inst = train.instance(i); vals[i] = inst.value(j); } sortedIndices[0][j] = Utils.sort(vals); for (int i = 0; i < train.numInstances(); i++) { weights[0][j][i] = train.instance(sortedIndices[0][j][i]).weight(); } } } } // Compute initial class counts double[] classProbs = new double[train.numClasses()]; double totalWeight = 0, totalSumSquared = 0; for (int i = 0; i < train.numInstances(); i++) { Instance inst = train.instance(i); if (data.classAttribute().isNominal()) { classProbs[(int) inst.classValue()] += inst.weight(); totalWeight += inst.weight(); } else { classProbs[0] += inst.classValue() * inst.weight(); totalSumSquared += inst.classValue() * inst.classValue() * inst.weight(); totalWeight += inst.weight(); } } m_Tree = new Tree(); double trainVariance = 0; if (data.classAttribute().isNumeric()) { trainVariance = m_Tree.singleVariance(classProbs[0], totalSumSquared, totalWeight) / totalWeight; classProbs[0] /= totalWeight; } // Build tree m_Tree.buildTree(sortedIndices, weights, train, totalWeight, classProbs, new Instances(train, 0), m_MinNum, m_MinVarianceProp * trainVariance, 0, m_MaxDepth); // Insert pruning data and perform reduced error pruning if (!m_NoPruning) { m_Tree.insertHoldOutSet(prune); m_Tree.reducedErrorPrune(); m_Tree.backfitHoldOutSet(); } }
From source file:REPRandomTree.java
License:Open Source License
/** * Builds classifier.// ww w. ja v a2 s. co m * * @param data the data to train with * @throws Exception if building fails */ public void buildClassifier(Instances data) throws Exception { // can classifier handle the data? getCapabilities().testWithFail(data); // remove instances with missing class data = new Instances(data); data.deleteWithMissingClass(); Random random = new Random(m_Seed); m_zeroR = null; if (data.numAttributes() == 1) { m_zeroR = new ZeroR(); m_zeroR.buildClassifier(data); return; } // Randomize and stratify data.randomize(random); if (data.classAttribute().isNominal()) { data.stratify(m_NumFolds); } // Split data into training and pruning set Instances train = null; Instances prune = null; if (!m_NoPruning) { train = data.trainCV(m_NumFolds, 0, random); prune = data.testCV(m_NumFolds, 0); } else { train = data; } // Create array of sorted indices and weights int[][][] sortedIndices = new int[1][train.numAttributes()][0]; double[][][] weights = new double[1][train.numAttributes()][0]; double[] vals = new double[train.numInstances()]; for (int j = 0; j < train.numAttributes(); j++) { if (j != train.classIndex()) { weights[0][j] = new double[train.numInstances()]; if (train.attribute(j).isNominal()) { // Handling nominal attributes. Putting indices of // instances with missing values at the end. sortedIndices[0][j] = new int[train.numInstances()]; int count = 0; for (int i = 0; i < train.numInstances(); i++) { Instance inst = train.instance(i); if (!inst.isMissing(j)) { sortedIndices[0][j][count] = i; weights[0][j][count] = inst.weight(); count++; } } for (int i = 0; i < train.numInstances(); i++) { Instance inst = train.instance(i); if (inst.isMissing(j)) { sortedIndices[0][j][count] = i; weights[0][j][count] = inst.weight(); count++; } } } else { // Sorted indices are computed for numeric attributes for (int i = 0; i < train.numInstances(); i++) { Instance inst = train.instance(i); vals[i] = inst.value(j); } sortedIndices[0][j] = Utils.sort(vals); for (int i = 0; i < train.numInstances(); i++) { weights[0][j][i] = train.instance(sortedIndices[0][j][i]).weight(); } } } } // Compute initial class counts double[] classProbs = new double[train.numClasses()]; double totalWeight = 0, totalSumSquared = 0; for (int i = 0; i < train.numInstances(); i++) { Instance inst = train.instance(i); if (data.classAttribute().isNominal()) { classProbs[(int) inst.classValue()] += inst.weight(); totalWeight += inst.weight(); } else { classProbs[0] += inst.classValue() * inst.weight(); totalSumSquared += inst.classValue() * inst.classValue() * inst.weight(); totalWeight += inst.weight(); } } m_Tree = new Tree(); double trainVariance = 0; if (data.classAttribute().isNumeric()) { trainVariance = m_Tree.singleVariance(classProbs[0], totalSumSquared, totalWeight) / totalWeight; classProbs[0] /= totalWeight; } // Build tree m_Tree.buildTree(sortedIndices, weights, train, totalWeight, classProbs, new Instances(train, 0), m_MinNum, m_MinVarianceProp * trainVariance, 0, m_MaxDepth, m_FeatureFrac, random); // Insert pruning data and perform reduced error pruning if (!m_NoPruning) { m_Tree.insertHoldOutSet(prune); m_Tree.reducedErrorPrune(); m_Tree.backfitHoldOutSet(); } }
From source file:GainRatioAttributeEval1.java
License:Open Source License
/** * evaluates an individual attribute by measuring the gain ratio * of the class given the attribute.//from ww w. ja va 2 s.co m * * @param attribute the index of the attribute to be evaluated * @return the gain ratio * @throws Exception if the attribute could not be evaluated */ public double evaluateAttribute(int attribute) throws Exception { int i, j, ii, jj; int ni, nj; double sum = 0.0; ni = m_trainInstances.attribute(attribute).numValues() + 1; nj = m_numClasses + 1; double[] sumi, sumj; Instance inst; double temp = 0.0; sumi = new double[ni]; sumj = new double[nj]; double[][] counts = new double[ni][nj]; sumi = new double[ni]; sumj = new double[nj]; for (i = 0; i < ni; i++) { sumi[i] = 0.0; for (j = 0; j < nj; j++) { sumj[j] = 0.0; counts[i][j] = 0.0; } } // Fill the contingency table for (i = 0; i < m_numInstances; i++) { inst = m_trainInstances.instance(i); if (inst.isMissing(attribute)) { ii = ni - 1; } else { ii = (int) inst.value(attribute); } if (inst.isMissing(m_classIndex)) { jj = nj - 1; } else { jj = (int) inst.value(m_classIndex); } counts[ii][jj]++; } // get the row totals for (i = 0; i < ni; i++) { sumi[i] = 0.0; for (j = 0; j < nj; j++) { sumi[i] += counts[i][j]; sum += counts[i][j]; } } // get the column totals for (j = 0; j < nj; j++) { sumj[j] = 0.0; for (i = 0; i < ni; i++) { sumj[j] += counts[i][j]; } } // distribute missing counts if (m_missing_merge && (sumi[ni - 1] < m_numInstances) && (sumj[nj - 1] < m_numInstances)) { double[] i_copy = new double[sumi.length]; double[] j_copy = new double[sumj.length]; double[][] counts_copy = new double[sumi.length][sumj.length]; for (i = 0; i < ni; i++) { System.arraycopy(counts[i], 0, counts_copy[i], 0, sumj.length); } System.arraycopy(sumi, 0, i_copy, 0, sumi.length); System.arraycopy(sumj, 0, j_copy, 0, sumj.length); double total_missing = (sumi[ni - 1] + sumj[nj - 1] - counts[ni - 1][nj - 1]); // do the missing i's if (sumi[ni - 1] > 0.0) { for (j = 0; j < nj - 1; j++) { if (counts[ni - 1][j] > 0.0) { for (i = 0; i < ni - 1; i++) { temp = ((i_copy[i] / (sum - i_copy[ni - 1])) * counts[ni - 1][j]); counts[i][j] += temp; sumi[i] += temp; } counts[ni - 1][j] = 0.0; } } } sumi[ni - 1] = 0.0; // do the missing j's if (sumj[nj - 1] > 0.0) { for (i = 0; i < ni - 1; i++) { if (counts[i][nj - 1] > 0.0) { for (j = 0; j < nj - 1; j++) { temp = ((j_copy[j] / (sum - j_copy[nj - 1])) * counts[i][nj - 1]); counts[i][j] += temp; sumj[j] += temp; } counts[i][nj - 1] = 0.0; } } } sumj[nj - 1] = 0.0; // do the both missing if (counts[ni - 1][nj - 1] > 0.0 && total_missing != sum) { for (i = 0; i < ni - 1; i++) { for (j = 0; j < nj - 1; j++) { temp = (counts_copy[i][j] / (sum - total_missing)) * counts_copy[ni - 1][nj - 1]; counts[i][j] += temp; sumi[i] += temp; sumj[j] += temp; } } counts[ni - 1][nj - 1] = 0.0; } } return ContingencyTables.gainRatio(counts); }
From source file:ID3Chi.java
License:Open Source License
private double[] classifyInstanceWithToken(Instance instance, double token) { int numClasses = instance.numClasses(); double[] tokenDistribution = new double[numClasses]; if (m_Attribute == null) { for (int j = 0; j < numClasses; j++) { tokenDistribution[j] = token * m_Distribution[j]; }//w ww . ja va2 s. c o m } else { // for attribute values get token distribution if (instance.isMissing(m_Attribute)) { for (int j = 0; j < m_Attribute.numValues(); j++) { double[] dist = m_Successors[j].classifyInstanceWithToken(instance, token * m_Successors[j].m_Ratio); for (int i = 0; i < numClasses; i++) { tokenDistribution[i] += dist[i]; } } } else { int idx = (int) instance.value(m_Attribute); tokenDistribution = m_Successors[idx].classifyInstanceWithToken(instance, token * m_Successors[idx].m_Ratio); } } return tokenDistribution; }
From source file:ID3Chi.java
License:Open Source License
/** * Splits a dataset according to the values of a nominal attribute. * * @param data//from ww w . j av a 2 s .c o m * the data which is to be split * @param att * the attribute to be used for splitting * @return the sets of instances produced by the split */ private Instances[] splitData(Instances data, Attribute att) { // [att.numValues()] is location for "unknown" values Instances[] subset = new Instances[att.numValues() + 1]; for (int j = 0; j <= att.numValues(); j++) { subset[j] = new Instances(data, data.numInstances()); } Enumeration instEnum = data.enumerateInstances(); while (instEnum.hasMoreElements()) { Instance inst = (Instance) instEnum.nextElement(); if (inst.isMissing(att)) { subset[att.numValues()].add(inst); } else { subset[(int) inst.value(att)].add(inst); } } for (int i = 0; i < subset.length; i++) { subset[i].compactify(); } return subset; }
From source file:adams.data.conversion.WekaInstancesToTimeseries.java
License:Open Source License
/** * Performs the actual conversion./*from w w w . jav a2 s.c o m*/ * * @return the converted data * @throws Exception if something goes wrong with the conversion */ @Override protected Object doConvert() throws Exception { Timeseries result; Instances input; Instance inst; int indexDate; int indexValue; TimeseriesPoint point; int i; Date timestamp; double value; input = (Instances) m_Input; // determine attribute indices m_DateAttribute.setData(input); indexDate = m_DateAttribute.getIntIndex(); if (indexDate == -1) throw new IllegalStateException("Failed to located date attribute: " + m_DateAttribute.getIndex()); m_ValueAttribute.setData(input); indexValue = m_ValueAttribute.getIntIndex(); if (indexValue == -1) throw new IllegalStateException("Failed to located value attribute: " + m_ValueAttribute.getIndex()); result = new Timeseries(input.relationName() + "-" + input.attribute(indexValue).name()); for (i = 0; i < input.numInstances(); i++) { inst = input.instance(i); if (!inst.isMissing(indexDate) && !inst.isMissing(indexValue)) { timestamp = new Date((long) inst.value(indexDate)); value = inst.value(indexValue); point = new TimeseriesPoint(timestamp, value); result.add(point); } } return result; }
From source file:adams.data.instances.InstanceComparator.java
License:Open Source License
/** * Compares its two arguments for order. Returns a negative integer, * zero, or a positive integer as the first argument is less than, equal * to, or greater than the second.// w w w . j av a2 s .c o m * * @param o1 the first object to be compared. * @param o2 the second object to be compared. * @return a negative integer, zero, or a positive integer as the * first argument is less than, equal to, or greater than the * second. */ @Override public int compare(Instance o1, Instance o2) { int result; Instances header; int i; int weight; double d1; double d2; result = 0; header = o1.dataset(); i = 0; while ((result == 0) && (i < m_Indices.length)) { if (o1.isMissing(m_Indices[i]) && o2.isMissing(m_Indices[i])) result = 0; else if (o1.isMissing(m_Indices[i])) result = -1; else if (o2.isMissing(m_Indices[i])) result = +1; else if (header.attribute(m_Indices[i]).isNumeric()) { d1 = o1.value(m_Indices[i]); d2 = o2.value(m_Indices[i]); if (d1 < d2) result = -1; else if (d1 == d2) result = 0; else result = +1; } else { result = o1.stringValue(m_Indices[i]).compareTo(o2.stringValue(m_Indices[i])); } if (!m_Ascending[i]) result = -result; // add weight to index weight = (int) Math.pow(10, (m_Indices.length - i)); result *= weight; i++; } return result; }
From source file:adams.flow.transformer.WekaGetInstanceValue.java
License:Open Source License
/** * Executes the flow item.//from ww w . j ava 2 s. com * * @return null if everything is fine, otherwise error message */ @Override protected String doExecute() { String result; Instance inst; int index; result = null; inst = (Instance) m_InputToken.getPayload(); try { if (m_AttributeName.length() > 0) { index = inst.dataset().attribute(m_AttributeName).index(); } else { m_Index.setMax(inst.numAttributes()); index = m_Index.getIntIndex(); } if (inst.isMissing(index)) { m_OutputToken = new Token("?"); } else { switch (inst.attribute(index).type()) { case Attribute.NUMERIC: m_OutputToken = new Token(inst.value(index)); break; case Attribute.DATE: case Attribute.NOMINAL: case Attribute.STRING: case Attribute.RELATIONAL: m_OutputToken = new Token(inst.stringValue(index)); break; default: result = "Unhandled attribute type: " + inst.attribute(index).type(); } } } catch (Exception e) { result = handleException("Failed to obtain value from instance:\n" + inst, e); } return result; }
From source file:adams.flow.transformer.WekaInstanceBuffer.java
License:Open Source License
/** * Executes the flow item.//from ww w. jav a 2 s . c o m * * @return null if everything is fine, otherwise error message */ @Override protected String doExecute() { String result; Instance[] insts; Instance inst; double[] values; int i; int n; boolean updated; result = null; if (m_Operation == Operation.INSTANCE_TO_INSTANCES) { if (m_InputToken.getPayload() instanceof Instance) { insts = new Instance[] { (Instance) m_InputToken.getPayload() }; } else { insts = (Instance[]) m_InputToken.getPayload(); } for (n = 0; n < insts.length; n++) { inst = insts[n]; if ((m_Buffer != null) && m_CheckHeader) { if (!m_Buffer.equalHeaders(inst.dataset())) { getLogger().info("Header changed, resetting buffer"); m_Buffer = null; } } // buffer instance if (m_Buffer == null) m_Buffer = new Instances(inst.dataset(), 0); // we need to make sure that string and relational values are in our // buffer header and update the current Instance accordingly before // buffering it values = inst.toDoubleArray(); updated = false; for (i = 0; i < values.length; i++) { if (inst.isMissing(i)) continue; if (inst.attribute(i).isString()) { values[i] = m_Buffer.attribute(i).addStringValue(inst.stringValue(i)); updated = true; } else if (inst.attribute(i).isRelationValued()) { values[i] = m_Buffer.attribute(i).addRelation(inst.relationalValue(i)); updated = true; } } if (updated) { if (inst instanceof SparseInstance) { inst = new SparseInstance(inst.weight(), values); } else if (inst instanceof BinarySparseInstance) { inst = new BinarySparseInstance(inst.weight(), values); } else { if (!(inst instanceof DenseInstance)) { getLogger().severe("Unhandled instance class (" + inst.getClass().getName() + "), " + "defaulting to " + DenseInstance.class.getName()); } inst = new DenseInstance(inst.weight(), values); } } else { inst = (Instance) inst.copy(); } m_Buffer.add(inst); } if (m_Buffer.numInstances() % m_Interval == 0) { m_OutputToken = new Token(m_Buffer); if (m_ClearBuffer) m_Buffer = null; } } else if (m_Operation == Operation.INSTANCES_TO_INSTANCE) { m_Buffer = (Instances) m_InputToken.getPayload(); m_Iterator = m_Buffer.iterator(); } else { throw new IllegalStateException("Unhandled operation: " + m_Operation); } return result; }