List of usage examples for weka.core Instance value
public double value(Attribute att);
From source file:Classifier.supervised.LinearRegression.java
License:Open Source License
/** * Calculate the dependent value for a given instance for a * given regression model.//from w w w . j a v a2 s . c o m * * @param transformedInstance the input instance * @param selectedAttributes an array of flags indicating which * attributes are included in the regression model * @param coefficients an array of coefficients for the regression * model * @return the regression value for the instance. * @throws Exception if the class attribute of the input instance * is not assigned */ protected double regressionPrediction(Instance transformedInstance, boolean[] selectedAttributes, double[] coefficients) throws Exception { double result = 0; int column = 0; for (int j = 0; j < transformedInstance.numAttributes(); j++) { if ((m_ClassIndex != j) && (selectedAttributes[j])) { result += coefficients[column] * transformedInstance.value(j); column++; } } result += coefficients[column]; return result; }
From source file:Classifier.supervised.LinearRegression.java
License:Open Source License
/** * Calculate a linear regression using the selected attributes * * @param selectedAttributes an array of booleans where each element * is true if the corresponding attribute should be included in the * regression.//from w w w . ja v a 2s . c om * @return an array of coefficients for the linear regression model. * @throws Exception if an error occurred during the regression. */ protected double[] doRegression(boolean[] selectedAttributes) throws Exception { if (m_Debug) { System.out.print("doRegression("); for (int i = 0; i < selectedAttributes.length; i++) { System.out.print(" " + selectedAttributes[i]); } System.out.println(" )"); } int numAttributes = 0; for (int i = 0; i < selectedAttributes.length; i++) { if (selectedAttributes[i]) { numAttributes++; } } // Check whether there are still attributes left Matrix independent = null, dependent = null; if (numAttributes > 0) { independent = new Matrix(m_TransformedData.numInstances(), numAttributes); dependent = new Matrix(m_TransformedData.numInstances(), 1); for (int i = 0; i < m_TransformedData.numInstances(); i++) { Instance inst = m_TransformedData.instance(i); double sqrt_weight = Math.sqrt(inst.weight()); int column = 0; for (int j = 0; j < m_TransformedData.numAttributes(); j++) { if (j == m_ClassIndex) { dependent.set(i, 0, inst.classValue() * sqrt_weight); } else { if (selectedAttributes[j]) { double value = inst.value(j) - m_Means[j]; // We only need to do this if we want to // scale the input if (!m_checksTurnedOff) { value /= m_StdDevs[j]; } independent.set(i, column, value * sqrt_weight); column++; } } } } } // Compute coefficients (note that we have to treat the // intercept separately so that it doesn't get affected // by the ridge constant.) double[] coefficients = new double[numAttributes + 1]; if (numAttributes > 0) { double[] coeffsWithoutIntercept = independent.regression(dependent, m_Ridge).getCoefficients(); System.arraycopy(coeffsWithoutIntercept, 0, coefficients, 0, numAttributes); } coefficients[numAttributes] = m_ClassMean; // Convert coefficients into original scale int column = 0; for (int i = 0; i < m_TransformedData.numAttributes(); i++) { if ((i != m_TransformedData.classIndex()) && (selectedAttributes[i])) { // We only need to do this if we have scaled the // input. if (!m_checksTurnedOff) { coefficients[column] /= m_StdDevs[i]; } // We have centred the input coefficients[coefficients.length - 1] -= coefficients[column] * m_Means[i]; column++; } } return coefficients; }
From source file:Classifiers.BRkNN.java
License:Open Source License
/** * Select the best value for k by hold-one-out cross-validation. Hamming * Loss is minimized// ww w . jav a 2 s . com * * @throws Exception Potential exception thrown. To be handled in an upper level. */ private void crossValidate() throws Exception { try { // the performance for each different k double[] hammingLoss = new double[cvMaxK]; for (int i = 0; i < cvMaxK; i++) { hammingLoss[i] = 0; } Instances dataSet = train; Instance instance; // the hold out instance Instances neighbours; // the neighboring instances double[] origDistances, convertedDistances; for (int i = 0; i < dataSet.numInstances(); i++) { if (getDebug() && (i % 50 == 0)) { debug("Cross validating " + i + "/" + dataSet.numInstances() + "\r"); } instance = dataSet.instance(i); neighbours = lnn.kNearestNeighbours(instance, cvMaxK); origDistances = lnn.getDistances(); // gathering the true labels for the instance boolean[] trueLabels = new boolean[numLabels]; for (int counter = 0; counter < numLabels; counter++) { int classIdx = labelIndices[counter]; String classValue = instance.attribute(classIdx).value((int) instance.value(classIdx)); trueLabels[counter] = classValue.equals("1"); } // calculate the performance metric for each different k for (int j = cvMaxK; j > 0; j--) { convertedDistances = new double[origDistances.length]; System.arraycopy(origDistances, 0, convertedDistances, 0, origDistances.length); double[] confidences = this.getConfidences(neighbours, convertedDistances); boolean[] bipartition = null; switch (extension) { case NONE: // BRknn MultiLabelOutput results; results = new MultiLabelOutput(confidences, 0.5); bipartition = results.getBipartition(); break; case EXTA: // BRknn-a bipartition = labelsFromConfidences2(confidences); break; case EXTB: // BRknn-b bipartition = labelsFromConfidences3(confidences); break; } double symmetricDifference = 0; // |Y xor Z| for (int labelIndex = 0; labelIndex < numLabels; labelIndex++) { boolean actual = trueLabels[labelIndex]; boolean predicted = bipartition[labelIndex]; if (predicted != actual) { symmetricDifference++; } } hammingLoss[j - 1] += (symmetricDifference / numLabels); neighbours = new IBk().pruneToK(neighbours, convertedDistances, j - 1); } } // Display the results of the cross-validation if (getDebug()) { for (int i = cvMaxK; i > 0; i--) { debug("Hold-one-out performance of " + (i) + " neighbors "); debug("(Hamming Loss) = " + hammingLoss[i - 1] / dataSet.numInstances()); } } // Check through the performance stats and select the best // k value (or the lowest k if more than one best) double[] searchStats = hammingLoss; double bestPerformance = Double.NaN; int bestK = 1; for (int i = 0; i < cvMaxK; i++) { if (Double.isNaN(bestPerformance) || (bestPerformance > searchStats[i])) { bestPerformance = searchStats[i]; bestK = i + 1; } } numOfNeighbors = bestK; if (getDebug()) { System.err.println("Selected k = " + bestK); } } catch (Exception ex) { throw new Error("Couldn't optimize by cross-validation: " + ex.getMessage()); } }
From source file:Classifiers.BRkNN.java
License:Open Source License
/** * Calculates the confidences of the labels, based on the neighboring * instances// w ww . j a va 2 s .c om * * @param neighbours * the list of nearest neighboring instances * @param distances * the distances of the neighbors * @return the confidences of the labels */ private double[] getConfidences(Instances neighbours, double[] distances) { double total, weight; double neighborLabels = 0; double[] confidences = new double[numLabels]; // Set up a correction to the estimator for (int i = 0; i < numLabels; i++) { confidences[i] = 1.0 / Math.max(1, train.numInstances()); } total = (double) numLabels / Math.max(1, train.numInstances()); for (int i = 0; i < neighbours.numInstances(); i++) { // Collect class counts Instance current = neighbours.instance(i); distances[i] = distances[i] * distances[i]; distances[i] = Math.sqrt(distances[i] / (train.numAttributes() - numLabels)); weight = 1.0; weight *= current.weight(); for (int j = 0; j < numLabels; j++) { double value = Double.parseDouble( current.attribute(labelIndices[j]).value((int) current.value(labelIndices[j]))); if (Utils.eq(value, 1.0)) { confidences[j] += weight; neighborLabels += weight; } } total += weight; } avgPredictedLabels = (int) Math.round(neighborLabels / total); // Normalise distribution if (total > 0) { Utils.normalize(confidences, total); } return confidences; }
From source file:classify.Classifier.java
public static void missingValuesRows(Instances data) { int[] missingValues = new int[data.numInstances()]; for (int i = 0; i < data.numInstances(); i++) { missingValues[i] = 0;// w w w .j a v a 2s . c om } Instance example; String value = ""; //get number of missing attributes per row int missValues = 0; for (int i = 0; i < data.numInstances(); i++) { example = data.instance(i); for (int j = 0; j < 15; j++) { if (example.attribute(j).isNominal()) { value = example.stringValue(j); } else if (example.attribute(j).isNumeric()) { value = Double.toString(example.value(j)); } if (value.equals("?") || value.equals("NaN")) { missingValues[i]++; missValues++; } } } System.out.println("Number of Missing Values: " + missValues); //get how many times i attributes are missing int[] frequency = new int[15]; for (int i = 0; i < data.numInstances(); i++) { frequency[missingValues[i]]++; } int numRows = 0; for (int i = 0; i < data.numInstances(); i++) { if (missingValues[i] > 0) { numRows++; } } System.out.println("Number of rows with missing values: " + numRows); System.out.println("Number of missing attributes per row:"); for (int i = 0; i < 15; i++) { System.out.println(i + ": " + frequency[i]); } }
From source file:cluster.ABC.ClusterUtils.java
License:Open Source License
/** This function divides every attribute value in an instance by * the instance weight -- useful to find the mean of a cluster in * Euclidean space /*w w w . j a v a 2 s. c om*/ * @param inst Instance passed in for normalization (destructive update) */ public static void normalizeByWeight(Instance inst) { double weight = inst.weight(); if (inst instanceof SparseInstance) { for (int i = 0; i < inst.numValues(); i++) { inst.setValueSparse(i, inst.valueSparse(i) / weight); } } else if (!(inst instanceof SparseInstance)) { for (int i = 0; i < inst.numAttributes(); i++) { inst.setValue(i, inst.value(i) / weight); } } }
From source file:cluster.ABC.ClusterUtils.java
License:Open Source License
/** Finds sum of 2 instances (handles sparse and non-sparse) *//*from w w w .j av a 2 s .c om*/ public static Instance sumInstances(Instance inst1, Instance inst2, Instances m_Instances) throws Exception { int numAttributes = inst1.numAttributes(); if (inst2.numAttributes() != numAttributes) { throw new Exception("Error!! inst1 and inst2 should have same number of attributes."); } double weight1 = inst1.weight(), weight2 = inst2.weight(); double[] values = new double[numAttributes]; for (int i = 0; i < numAttributes; i++) { values[i] = 0; } if (inst1 instanceof SparseInstance && inst2 instanceof SparseInstance) { for (int i = 0; i < inst1.numValues(); i++) { int indexOfIndex = inst1.index(i); values[indexOfIndex] = inst1.valueSparse(i); } for (int i = 0; i < inst2.numValues(); i++) { int indexOfIndex = inst2.index(i); values[indexOfIndex] += inst2.valueSparse(i); } SparseInstance newInst = new SparseInstance(weight1 + weight2, values); newInst.setDataset(m_Instances); return newInst; } else if (!(inst1 instanceof SparseInstance) && !(inst2 instanceof SparseInstance)) { for (int i = 0; i < numAttributes; i++) { values[i] = inst1.value(i) + inst2.value(i); } } else { throw new Exception("Error!! inst1 and inst2 should be both of same type -- sparse or non-sparse"); } Instance newInst = new Instance(weight1 + weight2, values); newInst.setDataset(m_Instances); return newInst; }
From source file:clusterer.SimpleKMeansWithSilhouette.java
License:Open Source License
/** * Move the centroid to it's new coordinates. Generate the centroid * coordinates based on it's members (objects assigned to the cluster of the * centroid) and the distance function being used. * /* ww w .j a va 2 s . co m*/ * @param centroidIndex index of the centroid which the coordinates will be * computed * @param members the objects that are assigned to the cluster of this * centroid * @param updateClusterInfo if the method is supposed to update the m_Cluster * arrays * @param addToCentroidInstances true if the method is to add the computed * coordinates to the Instances holding the centroids * @return the centroid coordinates */ protected double[] moveCentroid(int centroidIndex, Instances members, boolean updateClusterInfo, boolean addToCentroidInstances) { double[] vals = new double[members.numAttributes()]; double[][] nominalDists = new double[members.numAttributes()][]; double[] weightMissing = new double[members.numAttributes()]; double[] weightNonMissing = new double[members.numAttributes()]; // Quickly calculate some relevant statistics for (int j = 0; j < members.numAttributes(); j++) { if (members.attribute(j).isNominal()) { nominalDists[j] = new double[members.attribute(j).numValues()]; } } for (Instance inst : members) { for (int j = 0; j < members.numAttributes(); j++) { if (inst.isMissing(j)) { weightMissing[j] += inst.weight(); } else { weightNonMissing[j] += inst.weight(); if (members.attribute(j).isNumeric()) { vals[j] += inst.weight() * inst.value(j); // Will be overwritten in Manhattan case } else { nominalDists[j][(int) inst.value(j)] += inst.weight(); } } } } for (int j = 0; j < members.numAttributes(); j++) { if (members.attribute(j).isNumeric()) { if (weightNonMissing[j] > 0) { vals[j] /= weightNonMissing[j]; } else { vals[j] = Utils.missingValue(); } } else { double max = -Double.MAX_VALUE; double maxIndex = -1; for (int i = 0; i < nominalDists[j].length; i++) { if (nominalDists[j][i] > max) { max = nominalDists[j][i]; maxIndex = i; } if (max < weightMissing[j]) { vals[j] = Utils.missingValue(); } else { vals[j] = maxIndex; } } } } if (m_DistanceFunction instanceof ManhattanDistance) { // Need to replace means by medians Instances sortedMembers = null; int middle = (members.numInstances() - 1) / 2; boolean dataIsEven = ((members.numInstances() % 2) == 0); if (m_PreserveOrder) { sortedMembers = members; } else { sortedMembers = new Instances(members); } for (int j = 0; j < members.numAttributes(); j++) { if ((weightNonMissing[j] > 0) && members.attribute(j).isNumeric()) { // singleton special case if (members.numInstances() == 1) { vals[j] = members.instance(0).value(j); } else { vals[j] = sortedMembers.kthSmallestValue(j, middle + 1); if (dataIsEven) { vals[j] = (vals[j] + sortedMembers.kthSmallestValue(j, middle + 2)) / 2; } } } } } if (updateClusterInfo) { for (int j = 0; j < members.numAttributes(); j++) { m_ClusterMissingCounts[centroidIndex][j] = weightMissing[j]; m_ClusterNominalCounts[centroidIndex][j] = nominalDists[j]; } } if (addToCentroidInstances) { m_ClusterCentroids.add(new DenseInstance(1.0, vals)); } return vals; }
From source file:cn.edu.xjtu.dbmine.source.NaiveBayes.java
License:Open Source License
/** * Generates the classifier./*from w w w. j a v a 2s. co m*/ * * @param instances set of instances serving as training data * @exception Exception if the classifier has not been generated * successfully */ public void buildClassifier(Instances instances) throws Exception { // can classifier handle the data? getCapabilities().testWithFail(instances); // remove instances with missing class instances = new Instances(instances); instances.deleteWithMissingClass(); m_NumClasses = instances.numClasses(); // Copy the instances m_Instances = new Instances(instances); // Discretize instances if required if (m_UseDiscretization) { m_Disc = new weka.filters.supervised.attribute.Discretize(); m_Disc.setInputFormat(m_Instances); m_Instances = weka.filters.Filter.useFilter(m_Instances, m_Disc); } else { m_Disc = null; } // Reserve space for the distributions m_Distributions = new Estimator[m_Instances.numAttributes() - 1][m_Instances.numClasses()]; m_ClassDistribution = new DiscreteEstimator(m_Instances.numClasses(), true); int attIndex = 0; Enumeration enu = m_Instances.enumerateAttributes(); while (enu.hasMoreElements()) { Attribute attribute = (Attribute) enu.nextElement(); // If the attribute is numeric, determine the estimator // numeric precision from differences between adjacent values double numPrecision = DEFAULT_NUM_PRECISION; if (attribute.type() == Attribute.NUMERIC) { m_Instances.sort(attribute); if ((m_Instances.numInstances() > 0) && !m_Instances.instance(0).isMissing(attribute)) { double lastVal = m_Instances.instance(0).value(attribute); double currentVal, deltaSum = 0; int distinct = 0; for (int i = 1; i < m_Instances.numInstances(); i++) { Instance currentInst = m_Instances.instance(i); if (currentInst.isMissing(attribute)) { break; } currentVal = currentInst.value(attribute); if (currentVal != lastVal) { deltaSum += currentVal - lastVal; lastVal = currentVal; distinct++; } } if (distinct > 0) { numPrecision = deltaSum / distinct; } } } for (int j = 0; j < m_Instances.numClasses(); j++) { switch (attribute.type()) { case Attribute.NUMERIC: if (m_UseKernelEstimator) { m_Distributions[attIndex][j] = new KernelEstimator(numPrecision); } else { m_Distributions[attIndex][j] = new NormalEstimator(numPrecision); } break; case Attribute.NOMINAL: m_Distributions[attIndex][j] = new DiscreteEstimator(attribute.numValues(), true); break; default: throw new Exception("Attribute type unknown to NaiveBayes"); } } attIndex++; } // Compute counts Enumeration enumInsts = m_Instances.enumerateInstances(); while (enumInsts.hasMoreElements()) { Instance instance = (Instance) enumInsts.nextElement(); updateClassifier(instance); } // Save space m_Instances = new Instances(m_Instances, 0); }
From source file:cn.edu.xjtu.dbmine.source.NaiveBayes.java
License:Open Source License
/** * Updates the classifier with the given instance. * * @param instance the new training instance to include in the model * @exception Exception if the instance could not be incorporated in * the model.//from w w w.j a va 2s.co m */ public void updateClassifier(Instance instance) throws Exception { if (!instance.classIsMissing()) { Enumeration enumAtts = m_Instances.enumerateAttributes(); int attIndex = 0; while (enumAtts.hasMoreElements()) { Attribute attribute = (Attribute) enumAtts.nextElement(); if (!instance.isMissing(attribute)) { m_Distributions[attIndex][(int) instance.classValue()].addValue(instance.value(attribute), instance.weight()); } attIndex++; } m_ClassDistribution.addValue(instance.classValue(), instance.weight()); } }