List of usage examples for weka.core Instance value
public double value(Attribute att);
From source file:machinelearningq2.ExtendedNaiveBayes.java
public Instances discretize(Instances instnc) throws Exception { Discretize d = new Discretize(); d.setInputFormat(instnc);/* w ww . j av a 2s. c o m*/ Instances newData = Filter.useFilter(instnc, d); binCount = new double[d.getBins()]; for (Instance line : newData) { for (int j = 0; j < newData.numAttributes() - 1; j++) { binCount[(int) line.value(j)]++; } } return newData; }
From source file:machinelearningq2.ExtendedNaiveBayes.java
/** * * The method distributionForInstance should work out the probabilities of * class membership for a single instance. * * @param instnc// ww w .j av a 2s . co m * @return * @throws Exception */ @Override public double[] distributionForInstance(Instance instnc) throws Exception { if ("d".equals(gausianOrDiscretise)) { return super.distributionForInstance(instnc); } // creates a double array for storing the naive calculations for each class double[] prediction = new double[classValueCounts.length]; for (int c = 0; c < classValueCounts.length; c++) { ArrayList<Double> likelihoods = new ArrayList<>(); double priorProbability = classValueCounts[c] / countData; likelihoods.add(priorProbability); for (int i = 0; i < instnc.numAttributes() - 1; i++) { double currentMean = attributeMeans[c][i]; double currentVariance = attributeVariance[c][i]; double attributeValue = instnc.value(i); double likelihood = 1 / (Math.sqrt(2 * Math.PI) * currentVariance) * Math.exp(-Math.pow(attributeValue - currentMean, 2) / (2 * Math.pow(currentVariance, 2))); likelihoods.add(likelihood); } double total = 1; for (Double x : likelihoods) { total *= x; } prediction[c] = total; } return prediction; }
From source file:machinelearningq2.ExtendedNaiveBayes.java
/** * * The method distributionForInstance should work out the probabilities of * class membership for a single instance. * * @param instnc/* www . j a va 2 s . c om*/ * @return * @throws Exception */ public double[] distributionForDiscrete(Instance instnc) throws Exception { // creates a double array for storing the naive calculations for each class double[] naiveBayes = new double[classValueCounts.length]; // loops through each class and computes the naive bayes for (int c = 0; c < naiveBayes.length; c++) { // stores all conditional probabilities for class membership such: // P(struct=0|crime=1), P(security=1|crime=1), P(area=1|crime=1) // and also it stores the prior probability: P(crime=1) ArrayList<Double> conditionalProbs = new ArrayList<>(); double priorProbability = classValueCounts[c] / countData; conditionalProbs.add(priorProbability); for (int i = 0; i < instnc.numValues() - 1; i++) { double attributeValue = instnc.value(i); DataFound d = new DataFound(attributeValue, c, i); int index = data.indexOf(d); if (index != -1) { double classValueCount = classValueCounts[(int) d.getClassValue()]; conditionalProbs.add(data.get(index).getConditionalProbability((int) classValueCount)); } } // compute the naive bayes double total = 1; for (Double x : conditionalProbs) { total *= x; } naiveBayes[c] = total; } return naiveBayes; }
From source file:machinelearning_cw.KNN.java
@Override public void buildClassifier(Instances data) throws Exception { if (useStandardisedAttributes) { mean = new double[data.numAttributes() - 1]; standardDeviation = new double[data.numAttributes() - 1]; // For each data attribute for (int i = 0; i < data.numAttributes() - 1; i++) { // Calculate mean and Standard deviation double[] meanAndStdDev = Helpers.meanAndStandardDeviation(data, i); double mean = meanAndStdDev[0]; double stdDev = meanAndStdDev[1]; this.mean[i] = mean; this.standardDeviation[i] = stdDev; isMeanAndStdDevInitialised = true; // Standardise the values in all instances for given attribute for (Instance eachInstance : data) { double value = eachInstance.value(i); double standardisedValue = (value - mean) / stdDev; // Instead of setValue, use toDoubleArray eachInstance.setValue(i, standardisedValue); }// w w w . j a v a2s . c o m } } trainingData = new Instances(data); if (autoDetermineK) { determineK(); } }
From source file:machinelearning_cw.KNN.java
@Override public double classifyInstance(Instance instance) throws Exception { // Check that classifier has been trained if (trainingData == null) { throw new Exception("Classifier has not been trained." + " No call to buildClassifier() was made"); }/*w ww . ja v a2 s . c om*/ if (useStandardisedAttributes) { if (!isMeanAndStdDevInitialised) { // throw exception } else { /* Standardise test instance */ for (int i = 0; i < instance.numAttributes() - 1; i++) { double value = instance.value(i); double standardisedValue = (value - mean[i]) / standardDeviation[i]; instance.setValue(i, standardisedValue); } } } if (!useWeightedVoting) { return super.classifyInstance(instance); } else { if (!useAcceleratedNNSearch) { /* Calculate euclidean distances */ double[] distances = Helpers.findEuclideanDistances(trainingData, instance); /* * Create a list of dictionaries where each dictionary contains * the keys "distance", "weight" and "id". * The distance key stores the euclidean distance for an * instance and the id key stores the hashcode for that * instance object. */ ArrayList<HashMap<String, Object>> table = Helpers.buildDistanceTable(trainingData, distances); /* Find the k smallest distances */ Object[] kClosestRows = new Object[k]; Object[] kClosestInstances = new Object[k]; double[] classValues = new double[k]; for (int i = 1; i <= k; i++) { ArrayList<Integer> tieIndices = new ArrayList<Integer>(); /* Find the positions in the table of the ith closest * neighbour. */ int[] closestRowIndices = this.findNthClosestNeighbourByWeights(table, i); if (closestRowIndices.length > 0) { /* Keep track of distance ties */ for (int j = 0; j < closestRowIndices.length; j++) { tieIndices.add(closestRowIndices[j]); } /* Break ties (by choosing winner at random) */ Random rand = new Random(); int matchingNeighbourPosition = tieIndices.get(rand.nextInt(tieIndices.size())); HashMap<String, Object> matchingRow = table.get(matchingNeighbourPosition); kClosestRows[i - 1] = matchingRow; } } /* * Find the closestInstances from their rows in the table and * also get their class values. */ for (int i = 0; i < kClosestRows.length; i++) { /* Build up closestInstances array */ for (int j = 0; j < trainingData.numInstances(); j++) { Instance inst = trainingData.get(j); HashMap<String, Object> row = (HashMap<String, Object>) kClosestRows[i]; if (Integer.toHexString(inst.hashCode()).equals(row.get("id"))) { kClosestInstances[i] = inst; } } } /* Vote by weights */ /* Get max class value */ double[] possibleClassValues = trainingData.attributeToDoubleArray(trainingData.classIndex()); int maxClassIndex = Utils.maxIndex(possibleClassValues); double maxClassValue = possibleClassValues[maxClassIndex]; ArrayList<Double> weightedVotes = new ArrayList<Double>(); /* Calculate the sum of votes for each class */ for (double i = 0; i <= maxClassValue; i++) { double weightCount = 0; /* Calculate sum */ for (int j = 0; j < kClosestInstances.length; j++) { Instance candidateInstance = (Instance) kClosestInstances[j]; if (candidateInstance.classValue() == i) { // Get weight HashMap<String, Object> row = (HashMap<String, Object>) kClosestRows[(int) j]; weightCount += (double) row.get("weight"); } } weightedVotes.add(weightCount); } /* Select instance with highest vote */ Double[] votesArray = new Double[weightedVotes.size()]; weightedVotes.toArray(votesArray); double greatestSoFar = votesArray[0]; int greatestIndex = 0; for (int i = 0; i < votesArray.length; i++) { if (votesArray[i] > greatestSoFar) { greatestSoFar = votesArray[i]; greatestIndex = i; } } /* * Class value will be the index because classes are indexed * from 0 upwards. */ return greatestIndex; } /* Use Orchards algorithm to accelerate NN search */ else { // find k nearest neighbours ArrayList<Instance> nearestNeighbours = new ArrayList<Instance>(); for (int i = 0; i < k; i++) { nearestNeighbours.add(findNthClosestWithOrchards(instance, trainingData, i)); } // Find their class values double[] classValues = new double[nearestNeighbours.size()]; for (int i = 0; i < nearestNeighbours.size(); i++) { classValues[i] = nearestNeighbours.get(i).classValue(); } return Helpers.mode(Helpers.arrayToArrayList(classValues)); } } }
From source file:machine_learing_clasifier.MyC45.java
public double classifyInstance(Instance instance) { if (m_Attribute == null) { return m_ClassValue; } else {//from w w w. j a v a 2s. c om if (m_Attribute.isNominal()) { return m_Successors[(int) instance.value(m_Attribute)].classifyInstance(instance); } else if (m_Attribute.isNumeric()) { if (instance.value(m_Attribute) < numericAttThreshold) { return m_Successors[0].classifyInstance(instance); } else { return m_Successors[1].classifyInstance(instance); } } else { return -1; } } }
From source file:machine_learing_clasifier.MyC45.java
public double BestContinousAttribute(Instances i, Attribute att) { i.sort(att);/*from w w w . j av a 2s . c o m*/ Enumeration enumForMissingAttr = i.enumerateInstances(); double temp = i.get(0).classValue(); double igtemp = 0; double bestthreshold = 0; double a; double b = i.get(0).value(att); while (enumForMissingAttr.hasMoreElements()) { Instance inst = (Instance) enumForMissingAttr.nextElement(); if (temp != inst.classValue()) { temp = inst.classValue(); a = b; b = inst.value(att); double threshold = a + ((b - a) / 2); double igtemp2 = computeInformationGainContinous(i, att, threshold); if (igtemp < igtemp2) { bestthreshold = threshold; igtemp = igtemp2; } } } return bestthreshold; }
From source file:machine_learing_clasifier.MyID3.java
public double classifyInstance(Instance instance) { if (m_Attribute == null) { return m_ClassValue; } else {/*from ww w. ja v a 2s .co m*/ return m_Successors[(int) instance.value(m_Attribute)].classifyInstance(instance); } }
From source file:main.NaiveBayes.java
License:Open Source License
/** * Generates the classifier./*from w ww . j a va 2s.c o m*/ * * @param instances set of instances serving as training data * @exception Exception if the classifier has not been generated successfully */ @Override public void buildClassifier(Instances instances) throws Exception { // can classifier handle the data? getCapabilities().testWithFail(instances); // remove instances with missing class instances = new Instances(instances); instances.deleteWithMissingClass(); m_NumClasses = instances.numClasses(); // Copy the instances m_Instances = new Instances(instances); // Discretize instances if required if (m_UseDiscretization) { m_Disc = new weka.filters.supervised.attribute.Discretize(); m_Disc.setInputFormat(m_Instances); m_Instances = weka.filters.Filter.useFilter(m_Instances, m_Disc); } else { m_Disc = null; } // Reserve space for the distributions m_Distributions = new Estimator[m_Instances.numAttributes() - 1][m_Instances.numClasses()]; m_ClassDistribution = new DiscreteEstimator(m_Instances.numClasses(), true); int attIndex = 0; Enumeration<Attribute> enu = m_Instances.enumerateAttributes(); while (enu.hasMoreElements()) { Attribute attribute = enu.nextElement(); // If the attribute is numeric, determine the estimator // numeric precision from differences between adjacent values double numPrecision = DEFAULT_NUM_PRECISION; if (attribute.type() == Attribute.NUMERIC) { m_Instances.sort(attribute); if ((m_Instances.numInstances() > 0) && !m_Instances.instance(0).isMissing(attribute)) { double lastVal = m_Instances.instance(0).value(attribute); double currentVal, deltaSum = 0; int distinct = 0; for (int i = 1; i < m_Instances.numInstances(); i++) { Instance currentInst = m_Instances.instance(i); if (currentInst.isMissing(attribute)) { break; } currentVal = currentInst.value(attribute); if (currentVal != lastVal) { deltaSum += currentVal - lastVal; lastVal = currentVal; distinct++; } } if (distinct > 0) { numPrecision = deltaSum / distinct; } } } for (int j = 0; j < m_Instances.numClasses(); j++) { switch (attribute.type()) { case Attribute.NUMERIC: if (m_UseKernelEstimator) { m_Distributions[attIndex][j] = new KernelEstimator(numPrecision); } else { m_Distributions[attIndex][j] = new NormalEstimator(numPrecision); } break; case Attribute.NOMINAL: m_Distributions[attIndex][j] = new DiscreteEstimator(attribute.numValues(), true); break; default: throw new Exception("Attribute type unknown to NaiveBayes"); } } attIndex++; } // Compute counts Enumeration<Instance> enumInsts = m_Instances.enumerateInstances(); while (enumInsts.hasMoreElements()) { Instance instance = enumInsts.nextElement(); updateClassifier(instance); } // Save space m_Instances = new Instances(m_Instances, 0); }
From source file:main.NaiveBayes.java
License:Open Source License
/** * Updates the classifier with the given instance. * /* ww w . ja va2 s. com*/ * @param instance the new training instance to include in the model * @exception Exception if the instance could not be incorporated in the * model. */ public void updateClassifier(Instance instance) throws Exception { if (!instance.classIsMissing()) { Enumeration<Attribute> enumAtts = m_Instances.enumerateAttributes(); int attIndex = 0; while (enumAtts.hasMoreElements()) { Attribute attribute = enumAtts.nextElement(); if (!instance.isMissing(attribute)) { m_Distributions[attIndex][(int) instance.classValue()].addValue(instance.value(attribute), instance.weight()); } attIndex++; } m_ClassDistribution.addValue(instance.classValue(), instance.weight()); } }