List of usage examples for weka.core Instance isMissing
public boolean isMissing(Attribute att);
From source file:machine_learing_clasifier.MyC45.java
@Override public void buildClassifier(Instances i) throws Exception { if (!i.classAttribute().isNominal()) { throw new Exception("Class not nominal"); }// w w w .j av a2 s.c o m //penanganan missing value for (int j = 0; j < i.numAttributes(); j++) { Attribute attr = i.attribute(j); for (int k = 0; k < i.numInstances(); k++) { Instance inst = i.instance(k); if (inst.isMissing(attr)) { inst.setValue(attr, fillMissingValue(i, attr)); //bisa dituning lagi performancenya } } } i = new Instances(i); i.deleteWithMissingClass(); makeTree(i); }
From source file:machine_learing_clasifier.MyID3.java
@Override public void buildClassifier(Instances i) throws Exception { if (!i.classAttribute().isNominal()) { throw new Exception("Class not nominal"); }/*from w ww.j a v a 2s. c o m*/ for (int j = 0; j < i.numAttributes(); j++) { Attribute attr = i.attribute(j); if (!attr.isNominal()) { throw new Exception("Attribute not nominal"); } for (int k = 0; k < i.numInstances(); k++) { Instance inst = i.instance(k); if (inst.isMissing(attr)) { throw new Exception("Missing value"); } } } i = new Instances(i); i.deleteWithMissingClass(); makeTree(i); }
From source file:main.NaiveBayes.java
License:Open Source License
/** * Generates the classifier./*from w w w.ja v a 2s. c o m*/ * * @param instances set of instances serving as training data * @exception Exception if the classifier has not been generated successfully */ @Override public void buildClassifier(Instances instances) throws Exception { // can classifier handle the data? getCapabilities().testWithFail(instances); // remove instances with missing class instances = new Instances(instances); instances.deleteWithMissingClass(); m_NumClasses = instances.numClasses(); // Copy the instances m_Instances = new Instances(instances); // Discretize instances if required if (m_UseDiscretization) { m_Disc = new weka.filters.supervised.attribute.Discretize(); m_Disc.setInputFormat(m_Instances); m_Instances = weka.filters.Filter.useFilter(m_Instances, m_Disc); } else { m_Disc = null; } // Reserve space for the distributions m_Distributions = new Estimator[m_Instances.numAttributes() - 1][m_Instances.numClasses()]; m_ClassDistribution = new DiscreteEstimator(m_Instances.numClasses(), true); int attIndex = 0; Enumeration<Attribute> enu = m_Instances.enumerateAttributes(); while (enu.hasMoreElements()) { Attribute attribute = enu.nextElement(); // If the attribute is numeric, determine the estimator // numeric precision from differences between adjacent values double numPrecision = DEFAULT_NUM_PRECISION; if (attribute.type() == Attribute.NUMERIC) { m_Instances.sort(attribute); if ((m_Instances.numInstances() > 0) && !m_Instances.instance(0).isMissing(attribute)) { double lastVal = m_Instances.instance(0).value(attribute); double currentVal, deltaSum = 0; int distinct = 0; for (int i = 1; i < m_Instances.numInstances(); i++) { Instance currentInst = m_Instances.instance(i); if (currentInst.isMissing(attribute)) { break; } currentVal = currentInst.value(attribute); if (currentVal != lastVal) { deltaSum += currentVal - lastVal; lastVal = currentVal; distinct++; } } if (distinct > 0) { numPrecision = deltaSum / distinct; } } } for (int j = 0; j < m_Instances.numClasses(); j++) { switch (attribute.type()) { case Attribute.NUMERIC: if (m_UseKernelEstimator) { m_Distributions[attIndex][j] = new KernelEstimator(numPrecision); } else { m_Distributions[attIndex][j] = new NormalEstimator(numPrecision); } break; case Attribute.NOMINAL: m_Distributions[attIndex][j] = new DiscreteEstimator(attribute.numValues(), true); break; default: throw new Exception("Attribute type unknown to NaiveBayes"); } } attIndex++; } // Compute counts Enumeration<Instance> enumInsts = m_Instances.enumerateInstances(); while (enumInsts.hasMoreElements()) { Instance instance = enumInsts.nextElement(); updateClassifier(instance); } // Save space m_Instances = new Instances(m_Instances, 0); }
From source file:main.NaiveBayes.java
License:Open Source License
/** * Updates the classifier with the given instance. * // ww w .j a v a 2 s .c om * @param instance the new training instance to include in the model * @exception Exception if the instance could not be incorporated in the * model. */ public void updateClassifier(Instance instance) throws Exception { if (!instance.classIsMissing()) { Enumeration<Attribute> enumAtts = m_Instances.enumerateAttributes(); int attIndex = 0; while (enumAtts.hasMoreElements()) { Attribute attribute = enumAtts.nextElement(); if (!instance.isMissing(attribute)) { m_Distributions[attIndex][(int) instance.classValue()].addValue(instance.value(attribute), instance.weight()); } attIndex++; } m_ClassDistribution.addValue(instance.classValue(), instance.weight()); } }
From source file:main.NaiveBayes.java
License:Open Source License
/** * Calculates the class membership probabilities for the given test instance. * /* ww w .j ava 2 s.co m*/ * @param instance the instance to be classified * @return predicted class probability distribution * @exception Exception if there is a problem generating the prediction */ @Override public double[] distributionForInstance(Instance instance) throws Exception { if (m_UseDiscretization) { m_Disc.input(instance); instance = m_Disc.output(); } double[] probs = new double[m_NumClasses]; for (int j = 0; j < m_NumClasses; j++) { probs[j] = m_ClassDistribution.getProbability(j); } Enumeration<Attribute> enumAtts = instance.enumerateAttributes(); int attIndex = 0; while (enumAtts.hasMoreElements()) { Attribute attribute = enumAtts.nextElement(); if (!instance.isMissing(attribute)) { double temp, max = 0; for (int j = 0; j < m_NumClasses; j++) { temp = Math.max(1e-75, Math.pow(m_Distributions[attIndex][j].getProbability(instance.value(attribute)), m_Instances.attribute(attIndex).weight())); probs[j] *= temp; if (probs[j] > max) { max = probs[j]; } if (Double.isNaN(probs[j])) { throw new Exception("NaN returned from estimator for attribute " + attribute.name() + ":\n" + m_Distributions[attIndex][j].toString()); } } if ((max > 0) && (max < 1e-75)) { // Danger of probability underflow for (int j = 0; j < m_NumClasses; j++) { probs[j] *= 1e75; } } } attIndex++; } // Display probabilities Utils.normalize(probs); return probs; }
From source file:meka.core.MLUtils.java
License:Open Source License
/** * ToIntArray - raw instance to int[] representation *//*from w w w .j av a 2 s .co m*/ public static final int[] toIntArray(Instance x, int L) { int y[] = new int[L]; for (int j = 0; j < L; j++) { // added the following if-statement to change missing values to -1 if (x.isMissing(j)) { y[j] = -1; } else { y[j] = (int) Math.round(x.value(j)); } } return y; }
From source file:meka.experiment.statisticsexporters.WekaFilter.java
License:Open Source License
/** * Converts the Instances back into statistics. * * @param data the data to convert * @return the generated statistics *//*from ww w .j av a 2 s . co m*/ protected List<EvaluationStatistics> fromInstances(Instances data) { List<EvaluationStatistics> result; EvaluationStatistics stat; MultiLabelClassifier cls; String rel; int i; int n; Instance inst; result = new ArrayList<>(); if (data.attribute(EvaluationStatistics.KEY_CLASSIFIER) == null) { log("Failed to locate attribute: " + EvaluationStatistics.KEY_CLASSIFIER); return result; } if (data.attribute(EvaluationStatistics.KEY_RELATION) == null) { log("Failed to locate attribute: " + EvaluationStatistics.KEY_RELATION); return result; } for (i = 0; i < data.numInstances(); i++) { inst = data.instance(i); try { cls = OptionUtils.fromCommandLine(MultiLabelClassifier.class, inst.stringValue(data.attribute(EvaluationStatistics.KEY_CLASSIFIER))); rel = inst.stringValue(data.attribute(EvaluationStatistics.KEY_RELATION)); stat = new EvaluationStatistics(cls, rel, null); for (n = 0; n < inst.numAttributes(); n++) { if (inst.attribute(n).isNumeric() && !inst.isMissing(n)) { stat.put(inst.attribute(n).name(), inst.value(n)); } } result.add(stat); } catch (Exception e) { handleException("Failed to process instance: " + inst, e); } } return result; }
From source file:milk.classifiers.MINND.java
License:Open Source License
/** * Calculates the distance between two instances * * @param first the first instance/*from www.j av a2 s . c o m*/ * @param second the second instance * @return the distance between the two given instances */ private double distance(Instance first, double[] mean, double[] var, int pos) { double diff, distance = 0; int j = 0; for (int i = 0; i < first.numAttributes(); i++) { // Skipp nominal attributes (incl. class & ID) if ((i == m_ClassIndex) || (i == m_IdIndex)) continue; // If attribute is numeric if (first.attribute(i).isNumeric()) { if (!first.isMissing(i)) { diff = first.value(i) - mean[j]; if (Utils.gr(var[j], m_ZERO)) distance += m_Change[pos][j] * var[j] * diff * diff; else distance += m_Change[pos][j] * diff * diff; } else { if (Utils.gr(var[j], m_ZERO)) distance += m_Change[pos][j] * var[j]; else distance += m_Change[pos][j] * 1.0; } } j++; } return distance; }
From source file:milk.classifiers.MINND.java
License:Open Source License
/** * Updates the minimum and maximum values for all the attributes * based on a new exemplar./*from w w w . j ava2s . co m*/ * * @param ex the new exemplar */ private void updateMinMax(Exemplar ex) { Instances insts = ex.getInstances(); int m = 0; for (int j = 0; j < insts.numAttributes(); j++) { if ((j != ex.idIndex()) && (j != ex.classIndex())) { if (insts.attribute(j).isNumeric()) { for (int k = 0; k < insts.numInstances(); k++) { Instance ins = insts.instance(k); if (!ins.isMissing(j)) { if (Double.isNaN(m_MinArray[m])) { m_MinArray[m] = ins.value(j); m_MaxArray[m] = ins.value(j); } else { if (ins.value(j) < m_MinArray[m]) m_MinArray[m] = ins.value(j); else if (ins.value(j) > m_MaxArray[m]) m_MaxArray[m] = ins.value(j); } } } } m++; } } }
From source file:milk.classifiers.TLD.java
License:Open Source License
/** * * @param exs the training exemplars//from w ww. jav a 2 s. co m * @exception if the model cannot be built properly */ public void buildClassifier(Exemplars exs) throws Exception { m_ClassIndex = exs.classIndex(); m_IdIndex = exs.idIndex(); int numegs = exs.numExemplars(); m_Dimension = exs.numAttributes() - 2; Exemplars pos = new Exemplars(exs, 0), neg = new Exemplars(exs, 0); for (int u = 0; u < numegs; u++) { Exemplar example = exs.exemplar(u); if (example.classValue() == 0) pos.add(example); else neg.add(example); } int pnum = pos.numExemplars(), nnum = neg.numExemplars(); m_MeanP = new double[pnum][m_Dimension]; m_VarianceP = new double[pnum][m_Dimension]; m_SumP = new double[pnum][m_Dimension]; m_MeanN = new double[nnum][m_Dimension]; m_VarianceN = new double[nnum][m_Dimension]; m_SumN = new double[nnum][m_Dimension]; m_ParamsP = new double[4 * m_Dimension]; m_ParamsN = new double[4 * m_Dimension]; // Estimation of the parameters: as the start value for search double[] pSumVal = new double[m_Dimension], // for m nSumVal = new double[m_Dimension]; double[] maxVarsP = new double[m_Dimension], // for a maxVarsN = new double[m_Dimension]; // Mean of sample variances: for b, b=a/E(\sigma^2)+2 double[] varMeanP = new double[m_Dimension], varMeanN = new double[m_Dimension]; // Variances of sample means: for w, w=E[var(\mu)]/E[\sigma^2] double[] meanVarP = new double[m_Dimension], meanVarN = new double[m_Dimension]; // number of exemplars without all values missing double[] numExsP = new double[m_Dimension], numExsN = new double[m_Dimension]; // Extract metadata fro both positive and negative bags for (int v = 0; v < pnum; v++) { Exemplar px = pos.exemplar(v); m_MeanP[v] = px.meanOrMode(); m_VarianceP[v] = px.variance(); Instances pxi = px.getInstances(); for (int w = 0, t = 0; w < m_Dimension; w++, t++) { if ((t == m_ClassIndex) || (t == m_IdIndex)) t++; if (!Double.isNaN(m_MeanP[v][w])) { for (int u = 0; u < pxi.numInstances(); u++) { Instance ins = pxi.instance(u); if (!ins.isMissing(t)) m_SumP[v][w] += ins.weight(); } numExsP[w]++; pSumVal[w] += m_MeanP[v][w]; meanVarP[w] += m_MeanP[v][w] * m_MeanP[v][w]; if (maxVarsP[w] < m_VarianceP[v][w]) maxVarsP[w] = m_VarianceP[v][w]; varMeanP[w] += m_VarianceP[v][w]; m_VarianceP[v][w] *= (m_SumP[v][w] - 1.0); if (m_VarianceP[v][w] < 0.0) m_VarianceP[v][w] = 0.0; } } } for (int v = 0; v < nnum; v++) { Exemplar nx = neg.exemplar(v); m_MeanN[v] = nx.meanOrMode(); m_VarianceN[v] = nx.variance(); Instances nxi = nx.getInstances(); for (int w = 0, t = 0; w < m_Dimension; w++, t++) { if ((t == m_ClassIndex) || (t == m_IdIndex)) t++; if (!Double.isNaN(m_MeanN[v][w])) { for (int u = 0; u < nxi.numInstances(); u++) if (!nxi.instance(u).isMissing(t)) m_SumN[v][w] += nxi.instance(u).weight(); numExsN[w]++; nSumVal[w] += m_MeanN[v][w]; meanVarN[w] += m_MeanN[v][w] * m_MeanN[v][w]; if (maxVarsN[w] < m_VarianceN[v][w]) maxVarsN[w] = m_VarianceN[v][w]; varMeanN[w] += m_VarianceN[v][w]; m_VarianceN[v][w] *= (m_SumN[v][w] - 1.0); if (m_VarianceN[v][w] < 0.0) m_VarianceN[v][w] = 0.0; } } } for (int w = 0; w < m_Dimension; w++) { pSumVal[w] /= numExsP[w]; nSumVal[w] /= numExsN[w]; if (numExsP[w] > 1) meanVarP[w] = meanVarP[w] / (numExsP[w] - 1.0) - pSumVal[w] * numExsP[w] / (numExsP[w] - 1.0); if (numExsN[w] > 1) meanVarN[w] = meanVarN[w] / (numExsN[w] - 1.0) - nSumVal[w] * numExsN[w] / (numExsN[w] - 1.0); varMeanP[w] /= numExsP[w]; varMeanN[w] /= numExsN[w]; } //Bounds and parameter values for each run double[][] bounds = new double[2][4]; double[] pThisParam = new double[4], nThisParam = new double[4]; // Initial values for parameters double a, b, w, m; // Optimize for one dimension for (int x = 0; x < m_Dimension; x++) { System.err.println("\n\n!!!!!!!!!!!!!!!!!!!!!!???Dimension #" + x); // Positive examplars: first run a = (maxVarsP[x] > ZERO) ? maxVarsP[x] : 1.0; b = a / varMeanP[x] + 2.0; // a/(b-2) = E(\sigma^2) w = meanVarP[x] / varMeanP[x]; // E[var(\mu)] = w*E[\sigma^2] if (w <= ZERO) w = 1.0; m = pSumVal[x]; pThisParam[0] = a; // a pThisParam[1] = b; // b pThisParam[2] = w; // w pThisParam[3] = m; // m // Negative examplars: first run a = (maxVarsN[x] > ZERO) ? maxVarsN[x] : 1.0; b = a / varMeanN[x] + 2.0; // a/(b-2) = E(\sigma^2) w = meanVarN[x] / varMeanN[x]; // E[var(\mu)] = w*E[\sigma^2] if (w <= ZERO) w = 1.0; m = nSumVal[x]; nThisParam[0] = a; // a nThisParam[1] = b; // b nThisParam[2] = w; // w nThisParam[3] = m; // m // Bound constraints bounds[0][0] = ZERO; // a > 0 bounds[0][1] = 2.0 + ZERO; // b > 2 bounds[0][2] = ZERO; // w > 0 bounds[0][3] = Double.NaN; for (int t = 0; t < 4; t++) { bounds[1][t] = Double.NaN; m_ParamsP[4 * x + t] = pThisParam[t]; m_ParamsN[4 * x + t] = nThisParam[t]; } double pminVal = Double.MAX_VALUE, nminVal = Double.MAX_VALUE; Random whichEx = new Random(m_Seed); TLD_Optm pOp = null, nOp = null; boolean isRunValid = true; double[] sumP = new double[pnum], meanP = new double[pnum], varP = new double[pnum]; double[] sumN = new double[nnum], meanN = new double[nnum], varN = new double[nnum]; // One dimension for (int p = 0; p < pnum; p++) { sumP[p] = m_SumP[p][x]; meanP[p] = m_MeanP[p][x]; varP[p] = m_VarianceP[p][x]; } for (int q = 0; q < nnum; q++) { sumN[q] = m_SumN[q][x]; meanN[q] = m_MeanN[q][x]; varN[q] = m_VarianceN[q][x]; } for (int y = 0; y < m_Run;) { System.err.println("\n\n!!!!!!!!!!!!!!!!!!!!!!???Run #" + y); double thisMin; System.err.println("\nPositive exemplars"); pOp = new TLD_Optm(); pOp.setNum(sumP); pOp.setSSquare(varP); pOp.setXBar(meanP); pThisParam = pOp.findArgmin(pThisParam, bounds); while (pThisParam == null) { pThisParam = pOp.getVarbValues(); System.err.println("!!! 200 iterations finished, not enough!"); pThisParam = pOp.findArgmin(pThisParam, bounds); } thisMin = pOp.getMinFunction(); if (!Double.isNaN(thisMin) && (thisMin < pminVal)) { pminVal = thisMin; for (int z = 0; z < 4; z++) m_ParamsP[4 * x + z] = pThisParam[z]; } if (Double.isNaN(thisMin)) { pThisParam = new double[4]; isRunValid = false; } System.err.println("\nNegative exemplars"); nOp = new TLD_Optm(); nOp.setNum(sumN); nOp.setSSquare(varN); nOp.setXBar(meanN); nThisParam = nOp.findArgmin(nThisParam, bounds); while (nThisParam == null) { nThisParam = nOp.getVarbValues(); System.err.println("!!! 200 iterations finished, not enough!"); nThisParam = nOp.findArgmin(nThisParam, bounds); } thisMin = nOp.getMinFunction(); if (!Double.isNaN(thisMin) && (thisMin < nminVal)) { nminVal = thisMin; for (int z = 0; z < 4; z++) m_ParamsN[4 * x + z] = nThisParam[z]; } if (Double.isNaN(thisMin)) { nThisParam = new double[4]; isRunValid = false; } if (!isRunValid) { y--; isRunValid = true; } if (++y < m_Run) { // Change the initial parameters and restart int pone = whichEx.nextInt(pnum), // Randomly pick one pos. exmpl. none = whichEx.nextInt(nnum); // Positive exemplars: next run while ((m_SumP[pone][x] <= 1.0) || Double.isNaN(m_MeanP[pone][x])) pone = whichEx.nextInt(pnum); a = m_VarianceP[pone][x] / (m_SumP[pone][x] - 1.0); if (a <= ZERO) a = m_ParamsN[4 * x]; // Change to negative params m = m_MeanP[pone][x]; double sq = (m - m_ParamsP[4 * x + 3]) * (m - m_ParamsP[4 * x + 3]); b = a * m_ParamsP[4 * x + 2] / sq + 2.0; // b=a/Var+2, assuming Var=Sq/w' if ((b <= ZERO) || Double.isNaN(b) || Double.isInfinite(b)) b = m_ParamsN[4 * x + 1]; w = sq * (m_ParamsP[4 * x + 1] - 2.0) / m_ParamsP[4 * x];//w=Sq/Var, assuming Var=a'/(b'-2) if ((w <= ZERO) || Double.isNaN(w) || Double.isInfinite(w)) w = m_ParamsN[4 * x + 2]; pThisParam[0] = a; // a pThisParam[1] = b; // b pThisParam[2] = w; // w pThisParam[3] = m; // m // Negative exemplars: next run while ((m_SumN[none][x] <= 1.0) || Double.isNaN(m_MeanN[none][x])) none = whichEx.nextInt(nnum); a = m_VarianceN[none][x] / (m_SumN[none][x] - 1.0); if (a <= ZERO) a = m_ParamsP[4 * x]; m = m_MeanN[none][x]; sq = (m - m_ParamsN[4 * x + 3]) * (m - m_ParamsN[4 * x + 3]); b = a * m_ParamsN[4 * x + 2] / sq + 2.0; // b=a/Var+2, assuming Var=Sq/w' if ((b <= ZERO) || Double.isNaN(b) || Double.isInfinite(b)) b = m_ParamsP[4 * x + 1]; w = sq * (m_ParamsN[4 * x + 1] - 2.0) / m_ParamsN[4 * x];//w=Sq/Var, assuming Var=a'/(b'-2) if ((w <= ZERO) || Double.isNaN(w) || Double.isInfinite(w)) w = m_ParamsP[4 * x + 2]; nThisParam[0] = a; // a nThisParam[1] = b; // b nThisParam[2] = w; // w nThisParam[3] = m; // m } } } for (int x = 0, y = 0; x < m_Dimension; x++, y++) { if ((x == exs.classIndex()) || (x == exs.idIndex())) y++; a = m_ParamsP[4 * x]; b = m_ParamsP[4 * x + 1]; w = m_ParamsP[4 * x + 2]; m = m_ParamsP[4 * x + 3]; System.err.println( "\n\n???Positive: ( " + exs.attribute(y) + "): a=" + a + ", b=" + b + ", w=" + w + ", m=" + m); a = m_ParamsN[4 * x]; b = m_ParamsN[4 * x + 1]; w = m_ParamsN[4 * x + 2]; m = m_ParamsN[4 * x + 3]; System.err.println( "???Negative: (" + exs.attribute(y) + "): a=" + a + ", b=" + b + ", w=" + w + ", m=" + m); } if (m_UseEmpiricalCutOff) { // Find the empirical cut-off double[] pLogOdds = new double[pnum], nLogOdds = new double[nnum]; for (int p = 0; p < pnum; p++) pLogOdds[p] = likelihoodRatio(m_SumP[p], m_MeanP[p], m_VarianceP[p]); for (int q = 0; q < nnum; q++) nLogOdds[q] = likelihoodRatio(m_SumN[q], m_MeanN[q], m_VarianceN[q]); // Update m_Cutoff findCutOff(pLogOdds, nLogOdds); } else m_Cutoff = -Math.log((double) pnum / (double) nnum); System.err.println("???Cut-off=" + m_Cutoff); }