List of usage examples for weka.core Instance value
public double value(Attribute att);
From source file:kea.NumbersFilter.java
License:Open Source License
/** * Converts an instance. A phrase boundary is inserted where * a number is found.// w w w .j a v a 2 s.c o m */ private void convertInstance(Instance instance) throws Exception { double[] instVals = new double[instance.numAttributes()]; for (int i = 0; i < instance.numAttributes(); i++) { if ((!instance.attribute(i).isString()) || instance.isMissing(i)) { instVals[i] = instance.value(i); } else { String str = instance.stringValue(i); StringBuffer resultStr = new StringBuffer(); StringTokenizer tok = new StringTokenizer(str, " \t\n", true); while (tok.hasMoreTokens()) { String token = tok.nextToken(); // Everything that doesn't contain at least // one letter is considered to be a number boolean isNumber = true; for (int j = 0; j < token.length(); j++) { if (Character.isLetter(token.charAt(j))) { isNumber = false; break; } } if (!isNumber) { resultStr.append(token); } else { if (token.equals(" ") || token.equals("\t") || token.equals("\n")) { resultStr.append(token); } else { resultStr.append(" \n "); } } } int index = getOutputFormat().attribute(i).addStringValue(resultStr.toString()); instVals[i] = (double) index; } } Instance inst = new Instance(instance.weight(), instVals); inst.setDataset(getOutputFormat()); push(inst); }
From source file:kmeans.MyKMeans.java
void updateCentroidForNumeric(int numCentroid, int numAttr) { // System.out.println("Update centroid "+numCentroid+" attr "+dataSource.attribute(numAttr)+"|"+numAttr); List<Integer> listInst = listClusteredInstance.get(numCentroid); Attribute attr = dataSource.attribute(numAttr); double sum = 0; for (int i = 0; i < listInst.size(); i++) { Instance inst = dataSource.get(listInst.get(i)); sum += inst.value(attr); }//w ww . j av a 2s.c o m double newValue = (double) sum / listInst.size(); Instance tempCentroid = centroid.get(numCentroid); tempCentroid.setValue(attr, newValue); centroid.set(numCentroid, tempCentroid); }
From source file:knn.KNNClassifier.java
public static double getClassification(List<Instance> instances) { // grab the class index, create a blank map of type (double, integer) // set the maximum counts and values to zero int index = instances.get(0).classIndex(); HashMap<Double, Integer> counts = new HashMap<>(); int maxCount = 0; double maxValue = 0; // for each instance in instances: take val of instance and put in map for (Instance instance : instances) { double val = instance.value(index); // is it the first time? if not put in second slot if (!counts.containsKey(val)) { counts.put(val, 1); } else {//w ww . j a va 2s. c o m counts.put(val, counts.get(val) + 1); } } // for each entry of type entry (double, integer) in counts (map) // if the entry's value is greater than the maxCount... for (Entry<Double, Integer> entry : counts.entrySet()) { // if val is greater maxCount, assign the maxCount and maxVal // to the entry's val and key respectively if (entry.getValue() > maxCount) { maxCount = entry.getValue(); maxValue = entry.getKey(); } } return maxValue; }
From source file:knn.KNNClassifier.java
double EuclideanDistance(Instance instanceLHS, Instance instanceRHS) { // set dist to 0 double distance = 0; // from index 0 to left and right side's number of attributes - 1... for (int i = 0; i < instanceLHS.numAttributes() - 1 && i < instanceRHS.numAttributes() - 1; i++) { // if left and right side's attributes are numeric, set the distance equal // to the value of left value - right value all squared if (instanceLHS.attribute(i).isNumeric() && instanceRHS.attribute(i).isNumeric()) { distance += pow(instanceLHS.value(i) - instanceRHS.value(i), 2); } else {//from ww w . java 2 s.com // else add 5 to the distance unless left and right side's string converted // values are equal to one another, in which case set dist back to zero if (instanceLHS.stringValue(i).equals(instanceRHS.stringValue(i))) { distance += 0; } distance += 5; } } return distance; }
From source file:knn.KNNClassifier.java
double ManhattenDistance(Instance instanceLHS, Instance instanceRHS) { // set dist to 0 double distance = 0; // from index 0 to number of attributes - 1 on both sides... for (int i = 0; i < instanceLHS.numAttributes() - 1 && i < instanceRHS.numAttributes() - 1; i++) { // if left and right side's attributes are numbers set distance equal // to absolute value of left's value - right's value if (instanceLHS.attribute(i).isNumeric() && instanceRHS.attribute(i).isNumeric()) { distance += abs(instanceLHS.value(i) - instanceRHS.value(i)); } else {/*from w w w . j a va2s . c om*/ // else add 5 to distance unless left and right are equal, in which // case set the distance back to 0 if (instanceLHS.stringValue(i).equals(instanceRHS.stringValue(i))) { distance = 0; } distance += 5; } } return distance; }
From source file:knnclassifier.KNNClassifier.java
double EuclideanDistance(Instance instanceLHS, Instance instanceRHS) { double distance = 0; for (int i = 0; i < instanceLHS.numAttributes() - 1 && i < instanceRHS.numAttributes() - 1; i++) { if (instanceLHS.attribute(i).isNumeric() && instanceRHS.attribute(i).isNumeric()) { distance += pow(instanceLHS.value(i) - instanceRHS.value(i), 2); } else {//from w w w. jav a2s. com if (instanceLHS.stringValue(i).equals(instanceRHS.stringValue(i))) { distance += 0; } distance += 5; } } return distance; }
From source file:knnclassifier.KNNClassifier.java
double ManhattenDistance(Instance instanceLHS, Instance instanceRHS) { double distance = 0; for (int i = 0; i < instanceLHS.numAttributes() - 1 && i < instanceRHS.numAttributes() - 1; i++) { if (instanceLHS.attribute(i).isNumeric() && instanceRHS.attribute(i).isNumeric()) { distance += abs(instanceLHS.value(i) - instanceRHS.value(i)); } else {//from w w w. j av a2 s. co m if (instanceLHS.stringValue(i).equals(instanceRHS.stringValue(i))) { distance = 0; } distance += 5; } } return distance; }
From source file:lineage.AAFClusterer.java
License:Open Source License
/** * K-Means Clustering//from www .jav a2 s .c o m * @param data - matrix of observations (numObs x numFeatures) * @param k - number of clusters */ public Cluster[] kmeans(double[][] data, int numObs, int numFeatures, int k) { Instances ds = convertMatrixToWeka(data, numObs, numFeatures); // uses Euclidean distance by default SimpleKMeans clusterer = new SimpleKMeans(); try { clusterer.setPreserveInstancesOrder(true); clusterer.setNumClusters(k); clusterer.buildClusterer(ds); // cluster centers Instances centers = clusterer.getClusterCentroids(); Cluster[] clusters = new Cluster[centers.numInstances()]; for (int i = 0; i < centers.numInstances(); i++) { Instance inst = centers.instance(i); double[] mean = new double[inst.numAttributes()]; for (int j = 0; j < mean.length; j++) { mean[j] = inst.value(j); } clusters[i] = new Cluster(mean, i); } // cluster members int[] assignments = clusterer.getAssignments(); for (int i = 0; i < assignments.length; i++) { clusters[assignments[i]].addMember(i); } return clusters; } catch (Exception e) { e.printStackTrace(); System.exit(-1); return null; } }
From source file:lineage.AAFClusterer.java
License:Open Source License
/** * Expectation Maximization clustering//w w w . ja v a2 s. c o m * @param data - matrix of observations (numObs x numFeatures) * @param k - number of clusters */ public Cluster[] em(double[][] data, int numObs, int numFeatures) { Instances ds = convertMatrixToWeka(data, numObs, numFeatures); EM clusterer = new EM(); try { clusterer.buildClusterer(ds); ClusterEvaluation eval = new ClusterEvaluation(); eval.setClusterer(clusterer); eval.evaluateClusterer(new Instances(ds)); int numClusters = eval.getNumClusters(); Cluster[] clusters = new Cluster[numClusters]; double[][] clusterCentroids = new double[numClusters][numFeatures]; int[] clusterCount = new int[numClusters]; double[] assignments = eval.getClusterAssignments(); for (int i = 0; i < ds.numInstances(); i++) { Instance inst = ds.instance(i); int clusterId = (int) assignments[i]; for (int j = 0; j < numFeatures; j++) { clusterCentroids[clusterId][j] += inst.value(j); } clusterCount[clusterId]++; } for (int i = 0; i < numClusters; i++) { double[] mean = new double[numFeatures]; for (int j = 0; j < numFeatures; j++) { mean[j] = clusterCentroids[i][j] / clusterCount[i]; } clusters[i] = new Cluster(mean, i); } // cluster members & std dev double[][] clusterStdDev = new double[numClusters][numFeatures]; for (int i = 0; i < ds.numInstances(); i++) { int clusterId = (int) assignments[i]; clusters[clusterId].addMember(i); for (int j = 0; j < numFeatures; j++) { clusterStdDev[clusterId][j] += Math .pow(ds.instance(i).value(j) - clusters[clusterId].getCentroid()[j], 2); } } for (int i = 0; i < numClusters; i++) { double[] dev = new double[numFeatures]; for (int j = 0; j < numFeatures; j++) { dev[j] = Math.sqrt(clusterStdDev[i][j] / clusterCount[i]); } clusters[i].setStdDev(dev); } return clusters; } catch (Exception e) { e.printStackTrace(); System.exit(-1); return null; } }
From source file:LogReg.Logistic.java
License:Open Source License
/** * Builds the classifier//from ww w. ja v a2 s . c o m * * @param train the training data to be used for generating the * boosted classifier. * @throws Exception if the classifier could not be built successfully */ public void buildClassifier(Instances train) throws Exception { // can classifier handle the data? getCapabilities().testWithFail(train); // remove instances with missing class train = new Instances(train); train.deleteWithMissingClass(); // Replace missing values m_ReplaceMissingValues = new ReplaceMissingValues(); m_ReplaceMissingValues.setInputFormat(train); train = Filter.useFilter(train, m_ReplaceMissingValues); // Remove useless attributes m_AttFilter = new RemoveUseless(); m_AttFilter.setInputFormat(train); train = Filter.useFilter(train, m_AttFilter); // Transform attributes m_NominalToBinary = new NominalToBinary(); m_NominalToBinary.setInputFormat(train); train = Filter.useFilter(train, m_NominalToBinary); // Save the structure for printing the model m_structure = new Instances(train, 0); // Extract data m_ClassIndex = train.classIndex(); m_NumClasses = train.numClasses(); int nK = m_NumClasses - 1; // Only K-1 class labels needed int nR = m_NumPredictors = train.numAttributes() - 1; int nC = train.numInstances(); m_Data = new double[nC][nR + 1]; // Data values int[] Y = new int[nC]; // Class labels double[] xMean = new double[nR + 1]; // Attribute means xSD = new double[nR + 1]; // Attribute stddev's double[] sY = new double[nK + 1]; // Number of classes double[] weights = new double[nC]; // Weights of instances double totWeights = 0; // Total weights of the instances m_Par = new double[nR + 1][nK]; // Optimized parameter values if (m_Debug) { System.out.println("Extracting data..."); } for (int i = 0; i < nC; i++) { // initialize X[][] Instance current = train.instance(i); Y[i] = (int) current.classValue(); // Class value starts from 0 weights[i] = current.weight(); // Dealing with weights totWeights += weights[i]; m_Data[i][0] = 1; int j = 1; for (int k = 0; k <= nR; k++) { if (k != m_ClassIndex) { double x = current.value(k); m_Data[i][j] = x; xMean[j] += weights[i] * x; xSD[j] += weights[i] * x * x; j++; } } // Class count sY[Y[i]]++; } if ((totWeights <= 1) && (nC > 1)) throw new Exception("Sum of weights of instances less than 1, please reweight!"); xMean[0] = 0; xSD[0] = 1; for (int j = 1; j <= nR; j++) { xMean[j] = xMean[j] / totWeights; if (totWeights > 1) xSD[j] = Math.sqrt(Math.abs(xSD[j] - totWeights * xMean[j] * xMean[j]) / (totWeights - 1)); else xSD[j] = 0; } if (m_Debug) { // Output stats about input data System.out.println("Descriptives..."); for (int m = 0; m <= nK; m++) System.out.println(sY[m] + " cases have class " + m); System.out.println("\n Variable Avg SD "); for (int j = 1; j <= nR; j++) System.out.println(Utils.doubleToString(j, 8, 4) + Utils.doubleToString(xMean[j], 10, 4) + Utils.doubleToString(xSD[j], 10, 4)); } // Normalise input data for (int i = 0; i < nC; i++) { for (int j = 0; j <= nR; j++) { if (xSD[j] != 0) { m_Data[i][j] = (m_Data[i][j] - xMean[j]) / xSD[j]; } } } if (m_Debug) { System.out.println("\nIteration History..."); } double x[] = new double[(nR + 1) * nK]; double[][] b = new double[2][x.length]; // Boundary constraints, N/A here // Initialize for (int p = 0; p < nK; p++) { int offset = p * (nR + 1); x[offset] = Math.log(sY[p] + 1.0) - Math.log(sY[nK] + 1.0); // Null model b[0][offset] = Double.NaN; b[1][offset] = Double.NaN; for (int q = 1; q <= nR; q++) { x[offset + q] = 0.0; b[0][offset + q] = Double.NaN; b[1][offset + q] = Double.NaN; } } OptEng opt = new OptEng(); opt.setDebug(m_Debug); opt.setWeights(weights); opt.setClassLabels(Y); if (m_MaxIts == -1) { // Search until convergence x = opt.findArgmin(x, b); while (x == null) { x = opt.getVarbValues(); if (m_Debug) System.out.println("200 iterations finished, not enough!"); x = opt.findArgmin(x, b); } if (m_Debug) System.out.println(" -------------<Converged>--------------"); } else { opt.setMaxIteration(m_MaxIts); x = opt.findArgmin(x, b); if (x == null) // Not enough, but use the current value x = opt.getVarbValues(); } m_LL = -opt.getMinFunction(); // Log-likelihood // Don't need data matrix anymore m_Data = null; // Convert coefficients back to non-normalized attribute units for (int i = 0; i < nK; i++) { m_Par[0][i] = x[i * (nR + 1)]; for (int j = 1; j <= nR; j++) { m_Par[j][i] = x[i * (nR + 1) + j]; if (xSD[j] != 0) { m_Par[j][i] /= xSD[j]; m_Par[0][i] -= m_Par[j][i] * xMean[j]; } } } }