List of usage examples for weka.core Instance value
public double value(Attribute att);
From source file:ID3Chi.java
License:Open Source License
/** * Computes class distribution for instance using decision tree. * * @param instance/*from www . j a v a 2 s . c om*/ * the instance for which distribution is to be computed * @return the class distribution for the given instance * @throws NoSupportForMissingValuesException * if instance has missing values */ public double[] distributionForInstance(Instance instance) { if (m_Attribute == null) { return m_Distribution; } else { return m_Successors[(int) instance.value(m_Attribute)].distributionForInstance(instance); } }
From source file:ID3Chi.java
License:Open Source License
/** * Splits a dataset according to the values of a nominal attribute. * * @param data//from w ww .j av a2 s . c o m * the data which is to be split * @param att * the attribute to be used for splitting * @return the sets of instances produced by the split */ private Instances[] splitData(Instances data, Attribute att) { // [att.numValues()] is location for "unknown" values Instances[] subset = new Instances[att.numValues() + 1]; for (int j = 0; j <= att.numValues(); j++) { subset[j] = new Instances(data, data.numInstances()); } Enumeration instEnum = data.enumerateInstances(); while (instEnum.hasMoreElements()) { Instance inst = (Instance) instEnum.nextElement(); if (inst.isMissing(att)) { subset[att.numValues()].add(inst); } else { subset[(int) inst.value(att)].add(inst); } } for (int i = 0; i < subset.length; i++) { subset[i].compactify(); } return subset; }
From source file:MPCKMeans.java
License:Open Source License
/** Actual KMeans function */ protected void runKMeans() throws Exception { boolean converged = false; m_Iterations = 0;//from www. j a v a 2s. co m m_numBlankIterations = 0; m_Objective = Double.POSITIVE_INFINITY; if (!m_isOfflineMetric) { if (m_useMultipleMetrics) { for (int i = 0; i < m_metrics.length; i++) { m_metrics[i].resetMetric(); m_metricLearners[i].resetLearner(); } } else { m_metric.resetMetric(); m_metricLearner.resetLearner(); } // initialize max CL penalties if (m_ConstraintsHash.size() > 0) { m_maxCLPenalties = calculateMaxCLPenalties(); } } // initialize m_ClusterAssignments for (int i = 0; i < m_NumClusters; i++) { m_ClusterAssignments[i] = -1; } PrintStream fincoh = null; if (m_ConstraintIncoherenceFile != null) { fincoh = new PrintStream(new FileOutputStream(m_ConstraintIncoherenceFile)); } while (!converged) { System.out.println("\n" + m_Iterations + ". Objective function: " + ((float) m_Objective)); m_OldObjective = m_Objective; // E-step int numMovedPoints = findBestAssignments(); m_numBlankIterations = (numMovedPoints == 0) ? m_numBlankIterations + 1 : 0; // calculateObjectiveFunction(false); System.out.println((float) m_Objective + " - Objective function after point assignment(CALC)"); System.out.println("\tvar=" + ((float) m_objVariance) + "\tC=" + ((float) m_objCannotLinks) + "\tM=" + ((float) m_objMustLinks) + "\tLOG=" + ((float) m_objNormalizer) + "\tREG=" + ((float) m_objRegularizer)); // M-step updateClusterCentroids(); // calculateObjectiveFunction(false); System.out.println((float) m_Objective + " - Objective function after centroid estimation"); System.out.println("\tvar=" + ((float) m_objVariance) + "\tC=" + ((float) m_objCannotLinks) + "\tM=" + ((float) m_objMustLinks) + "\tLOG=" + ((float) m_objNormalizer) + "\tREG=" + ((float) m_objRegularizer)); if (m_Trainable == TRAINING_INTERNAL && !m_isOfflineMetric) { updateMetricWeights(); if (m_verbose) { calculateObjectiveFunction(true); System.out.println((float) m_Objective + " - Objective function after metric update"); System.out.println("\tvar=" + ((float) m_objVariance) + "\tC=" + ((float) m_objCannotLinks) + "\tM=" + ((float) m_objMustLinks) + "\tLOG=" + ((float) m_objNormalizer) + "\tREG=" + ((float) m_objRegularizer)); } if (m_ConstraintsHash.size() > 0) { m_maxCLPenalties = calculateMaxCLPenalties(); } } if (fincoh != null) { printConstraintIncoherence(fincoh); } converged = convergenceCheck(m_OldObjective, m_Objective); m_Iterations++; } if (fincoh != null) { fincoh.close(); } System.out.println("Converged!"); System.err.print("Its\t" + m_Iterations + "\t"); if (m_verbose) { System.out.println("Done clustering; top cluster features: "); for (int i = 0; i < m_NumClusters; i++) { System.out.println("Centroid " + i); TreeMap map = new TreeMap(Collections.reverseOrder()); Instance centroid = m_ClusterCentroids.instance(i); for (int j = 0; j < centroid.numValues(); j++) { Attribute attr = centroid.attributeSparse(j); map.put(new Double(centroid.value(attr)), attr.name()); } Iterator it = map.entrySet().iterator(); for (int j = 0; j < 5 && it.hasNext(); j++) { Map.Entry entry = (Map.Entry) it.next(); System.out.println("\t" + entry.getKey() + "\t" + entry.getValue()); } } } }
From source file:MPCKMeans.java
License:Open Source License
/** Go through the cannot-link constraints and find the current maximum distance * @return an array of maximum weighted distances. If a single metric is used, maximum distance * is calculated over the entire dataset */ // TODO: non-datasetWide case is not debugged currently!!! protected double[] calculateMaxCLPenalties() throws Exception { double[] maxPenalties = null; double[][] minValues = null; double[][] maxValues = null; int[] attrIdxs = null; maxPenalties = new double[m_NumClusters]; m_maxCLPoints = new Instance[m_NumClusters][2]; m_maxCLDiffInstances = new Instance[m_NumClusters]; for (int i = 0; i < m_NumClusters; i++) { m_maxCLPoints[i][0] = new Instance(m_Instances.numAttributes()); m_maxCLPoints[i][1] = new Instance(m_Instances.numAttributes()); m_maxCLPoints[i][0].setDataset(m_Instances); m_maxCLPoints[i][1].setDataset(m_Instances); m_maxCLDiffInstances[i] = new Instance(m_Instances.numAttributes()); m_maxCLDiffInstances[i].setDataset(m_Instances); }//from ww w .j a v a 2 s. c o m // TEMPORARY PLUG: this was supposed to take care of WeightedDotp, // but it turns out that with weighting similarity can be > 1. // if (m_metric.m_fixedMaxDistance) { // for (int i = 0; i < m_NumClusters; i++) { // maxPenalties[i] = m_metric.getMaxDistance(); // } // return maxPenalties; // } minValues = new double[m_NumClusters][m_metrics[0].getNumAttributes()]; maxValues = new double[m_NumClusters][m_metrics[0].getNumAttributes()]; attrIdxs = m_metrics[0].getAttrIndxs(); // temporary plug: if this if the first iteration when no instances were assigned to clusters, // dataset-wide (not cluster-wide!) minimum and maximum are used even for the case with // multiple metrics boolean datasetWide = true; if (m_useMultipleMetrics && m_Iterations > 0) { datasetWide = false; } // TODO: Mahalanobis - check with getMaxPoints // go through all points if (m_metric instanceof WeightedMahalanobis) { if (m_useMultipleMetrics) { for (int i = 0; i < m_metrics.length; i++) { double[][] maxPoints = ((WeightedMahalanobis) m_metrics[i]).getMaxPoints(m_ConstraintsHash, m_Instances); minValues[i] = maxPoints[0]; maxValues[i] = maxPoints[1]; // System.out.println("Max points " + i); // for (int j = 0; j < maxPoints[0].length; j++) { System.out.println(maxPoints[0][j] + " - " + maxPoints[1][j]);} } } else { double[][] maxPoints = ((WeightedMahalanobis) m_metric).getMaxPoints(m_ConstraintsHash, m_Instances); minValues[0] = maxPoints[0]; maxValues[0] = maxPoints[1]; for (int i = 0; i < m_metrics.length; i++) { minValues[i] = maxPoints[0]; maxValues[i] = maxPoints[1]; } // System.out.println("Max points:"); // for (int i = 0; i < maxPoints[0].length; i++) { System.out.println(maxPoints[0][i] + " - " + maxPoints[1][i]);} } } else { // find the enclosing hypercube for WeightedEuclidean etc. for (int i = 0; i < m_Instances.numInstances(); i++) { Instance instance = m_Instances.instance(i); for (int j = 0; j < attrIdxs.length; j++) { double val = instance.value(attrIdxs[j]); if (datasetWide) { if (val < minValues[0][j]) { minValues[0][j] = val; } if (val > maxValues[0][j]) { maxValues[0][j] = val; } } else { // cluster-specific min's and max's are needed if (val < minValues[m_ClusterAssignments[i]][j]) { minValues[m_ClusterAssignments[i]][j] = val; } if (val > maxValues[m_ClusterAssignments[i]][j]) { maxValues[m_ClusterAssignments[i]][j] = val; } } } } } // get the max/min points if (datasetWide) { for (int i = 0; i < attrIdxs.length; i++) { m_maxCLPoints[0][0].setValue(attrIdxs[i], minValues[0][i]); m_maxCLPoints[0][1].setValue(attrIdxs[i], maxValues[0][i]); } // must copy these over all clusters - just for the first iteration for (int j = 1; j < m_NumClusters; j++) { for (int i = 0; i < attrIdxs.length; i++) { m_maxCLPoints[j][0].setValue(attrIdxs[i], minValues[0][i]); m_maxCLPoints[j][1].setValue(attrIdxs[i], maxValues[0][i]); } } } else { // cluster-specific for (int j = 0; j < m_NumClusters; j++) { for (int i = 0; i < attrIdxs.length; i++) { m_maxCLPoints[j][0].setValue(attrIdxs[i], minValues[j][i]); m_maxCLPoints[j][1].setValue(attrIdxs[i], maxValues[j][i]); } } } // calculate the distances if (datasetWide) { maxPenalties[0] = m_metrics[0].penaltySymmetric(m_maxCLPoints[0][0], m_maxCLPoints[0][1]); m_maxCLDiffInstances[0] = m_metrics[0].createDiffInstance(m_maxCLPoints[0][0], m_maxCLPoints[0][1]); for (int i = 1; i < maxPenalties.length; i++) { maxPenalties[i] = maxPenalties[0]; m_maxCLDiffInstances[i] = m_maxCLDiffInstances[0]; } } else { // cluster-specific - SHOULD BE FIXED!!!! for (int j = 0; j < m_NumClusters; j++) { for (int i = 0; i < attrIdxs.length; i++) { maxPenalties[j] += m_metrics[j].penaltySymmetric(m_maxCLPoints[j][0], m_maxCLPoints[j][1]); m_maxCLDiffInstances[j] = m_metrics[0].createDiffInstance(m_maxCLPoints[j][0], m_maxCLPoints[j][1]); } } } System.out.println("Recomputed max CL penalties"); return maxPenalties; }
From source file:HierarchicalClusterer.java
License:Open Source License
/** calculate the distance between two clusters * @param cluster1 list of indices of instances in the first cluster * @param cluster2 dito for second cluster * @return distance between clusters based on link type *//* ww w .j ava 2s . com*/ double getDistance(double[][] fDistance, Vector<Integer> cluster1, Vector<Integer> cluster2) { double fBestDist = Double.MAX_VALUE; switch (m_nLinkType) { case SINGLE: // find single link distance aka minimum link, which is the closest distance between // any item in cluster1 and any item in cluster2 fBestDist = Double.MAX_VALUE; for (int i = 0; i < cluster1.size(); i++) { int i1 = cluster1.elementAt(i); for (int j = 0; j < cluster2.size(); j++) { int i2 = cluster2.elementAt(j); double fDist = fDistance[i1][i2]; if (fBestDist > fDist) { fBestDist = fDist; } } } break; case COMPLETE: case ADJCOMLPETE: // find complete link distance aka maximum link, which is the largest distance between // any item in cluster1 and any item in cluster2 fBestDist = 0; for (int i = 0; i < cluster1.size(); i++) { int i1 = cluster1.elementAt(i); for (int j = 0; j < cluster2.size(); j++) { int i2 = cluster2.elementAt(j); double fDist = fDistance[i1][i2]; if (fBestDist < fDist) { fBestDist = fDist; } } } if (m_nLinkType == COMPLETE) { break; } // calculate adjustment, which is the largest within cluster distance double fMaxDist = 0; for (int i = 0; i < cluster1.size(); i++) { int i1 = cluster1.elementAt(i); for (int j = i + 1; j < cluster1.size(); j++) { int i2 = cluster1.elementAt(j); double fDist = fDistance[i1][i2]; if (fMaxDist < fDist) { fMaxDist = fDist; } } } for (int i = 0; i < cluster2.size(); i++) { int i1 = cluster2.elementAt(i); for (int j = i + 1; j < cluster2.size(); j++) { int i2 = cluster2.elementAt(j); double fDist = fDistance[i1][i2]; if (fMaxDist < fDist) { fMaxDist = fDist; } } } fBestDist -= fMaxDist; break; case AVERAGE: // finds average distance between the elements of the two clusters fBestDist = 0; for (int i = 0; i < cluster1.size(); i++) { int i1 = cluster1.elementAt(i); for (int j = 0; j < cluster2.size(); j++) { int i2 = cluster2.elementAt(j); fBestDist += fDistance[i1][i2]; } } fBestDist /= (cluster1.size() * cluster2.size()); break; case MEAN: { // calculates the mean distance of a merged cluster (akak Group-average agglomerative clustering) Vector<Integer> merged = new Vector<Integer>(); merged.addAll(cluster1); merged.addAll(cluster2); fBestDist = 0; for (int i = 0; i < merged.size(); i++) { int i1 = merged.elementAt(i); for (int j = i + 1; j < merged.size(); j++) { int i2 = merged.elementAt(j); fBestDist += fDistance[i1][i2]; } } int n = merged.size(); fBestDist /= (n * (n - 1.0) / 2.0); } break; case CENTROID: // finds the distance of the centroids of the clusters double[] fValues1 = new double[m_instances.numAttributes()]; for (int i = 0; i < cluster1.size(); i++) { Instance instance = m_instances.instance(cluster1.elementAt(i)); for (int j = 0; j < m_instances.numAttributes(); j++) { fValues1[j] += instance.value(j); } } double[] fValues2 = new double[m_instances.numAttributes()]; for (int i = 0; i < cluster2.size(); i++) { Instance instance = m_instances.instance(cluster2.elementAt(i)); for (int j = 0; j < m_instances.numAttributes(); j++) { fValues2[j] += instance.value(j); } } for (int j = 0; j < m_instances.numAttributes(); j++) { fValues1[j] /= cluster1.size(); fValues2[j] /= cluster2.size(); } // set up two instances for distance function Instance instance1 = (Instance) m_instances.instance(0).copy(); Instance instance2 = (Instance) m_instances.instance(0).copy(); for (int j = 0; j < m_instances.numAttributes(); j++) { instance1.setValue(j, fValues1[j]); instance2.setValue(j, fValues2[j]); } fBestDist = m_DistanceFunction.distance(instance1, instance2); break; case WARD: { // finds the distance of the change in caused by merging the cluster. // The information of a cluster is calculated as the error sum of squares of the // centroids of the cluster and its members. double ESS1 = calcESS(cluster1); double ESS2 = calcESS(cluster2); Vector<Integer> merged = new Vector<Integer>(); merged.addAll(cluster1); merged.addAll(cluster2); double ESS = calcESS(merged); fBestDist = ESS * merged.size() - ESS1 * cluster1.size() - ESS2 * cluster2.size(); } break; } return fBestDist; }
From source file:HierarchicalClusterer.java
License:Open Source License
/** calculated error sum-of-squares for instances wrt centroid **/ double calcESS(Vector<Integer> cluster) { double[] fValues1 = new double[m_instances.numAttributes()]; for (int i = 0; i < cluster.size(); i++) { Instance instance = m_instances.instance(cluster.elementAt(i)); for (int j = 0; j < m_instances.numAttributes(); j++) { fValues1[j] += instance.value(j); }//from www .j ava 2s . com } for (int j = 0; j < m_instances.numAttributes(); j++) { fValues1[j] /= cluster.size(); } // set up two instances for distance function Instance centroid = (Instance) m_instances.instance(cluster.elementAt(0)).copy(); for (int j = 0; j < m_instances.numAttributes(); j++) { centroid.setValue(j, fValues1[j]); } double fESS = 0; for (int i = 0; i < cluster.size(); i++) { Instance instance = m_instances.instance(cluster.elementAt(i)); fESS += m_DistanceFunction.distance(centroid, instance); } return fESS / cluster.size(); }
From source file:adams.data.conversion.AbstractMatchWekaInstanceAgainstHeader.java
License:Open Source License
/** * Matches the input instance against the header. * * @param input the Instance to align to the header * @return the aligned Instance/*from w ww .ja v a2s . c o m*/ */ protected Instance match(Instance input) { Instance result; double[] values; int i; values = new double[m_Dataset.numAttributes()]; for (i = 0; i < m_Dataset.numAttributes(); i++) { values[i] = Utils.missingValue(); switch (m_Dataset.attribute(i).type()) { case Attribute.NUMERIC: case Attribute.DATE: values[i] = input.value(i); break; case Attribute.NOMINAL: if (m_Dataset.attribute(i).indexOfValue(input.stringValue(i)) != -1) values[i] = m_Dataset.attribute(i).indexOfValue(input.stringValue(i)); break; case Attribute.STRING: values[i] = m_Dataset.attribute(i).addStringValue(input.stringValue(i)); break; case Attribute.RELATIONAL: values[i] = m_Dataset.attribute(i).addRelation(input.relationalValue(i)); break; default: throw new IllegalStateException( "Unhandled attribute type: " + Attribute.typeToString(m_Dataset.attribute(i).type())); } } if (input instanceof SparseInstance) result = new SparseInstance(input.weight(), values); else result = new DenseInstance(input.weight(), values); result.setDataset(m_Dataset); // fix class index, if necessary if ((input.classIndex() != m_Dataset.classIndex()) && (m_Dataset.classIndex() < 0)) m_Dataset.setClassIndex(input.classIndex()); return result; }
From source file:adams.data.conversion.WekaInstancesToTimeseries.java
License:Open Source License
/** * Performs the actual conversion./*from w ww .j a v a2 s . co m*/ * * @return the converted data * @throws Exception if something goes wrong with the conversion */ @Override protected Object doConvert() throws Exception { Timeseries result; Instances input; Instance inst; int indexDate; int indexValue; TimeseriesPoint point; int i; Date timestamp; double value; input = (Instances) m_Input; // determine attribute indices m_DateAttribute.setData(input); indexDate = m_DateAttribute.getIntIndex(); if (indexDate == -1) throw new IllegalStateException("Failed to located date attribute: " + m_DateAttribute.getIndex()); m_ValueAttribute.setData(input); indexValue = m_ValueAttribute.getIntIndex(); if (indexValue == -1) throw new IllegalStateException("Failed to located value attribute: " + m_ValueAttribute.getIndex()); result = new Timeseries(input.relationName() + "-" + input.attribute(indexValue).name()); for (i = 0; i < input.numInstances(); i++) { inst = input.instance(i); if (!inst.isMissing(indexDate) && !inst.isMissing(indexValue)) { timestamp = new Date((long) inst.value(indexDate)); value = inst.value(indexValue); point = new TimeseriesPoint(timestamp, value); result.add(point); } } return result; }
From source file:adams.data.instances.InstanceComparator.java
License:Open Source License
/** * Compares its two arguments for order. Returns a negative integer, * zero, or a positive integer as the first argument is less than, equal * to, or greater than the second./* w w w . j a v a 2 s . com*/ * * @param o1 the first object to be compared. * @param o2 the second object to be compared. * @return a negative integer, zero, or a positive integer as the * first argument is less than, equal to, or greater than the * second. */ @Override public int compare(Instance o1, Instance o2) { int result; Instances header; int i; int weight; double d1; double d2; result = 0; header = o1.dataset(); i = 0; while ((result == 0) && (i < m_Indices.length)) { if (o1.isMissing(m_Indices[i]) && o2.isMissing(m_Indices[i])) result = 0; else if (o1.isMissing(m_Indices[i])) result = -1; else if (o2.isMissing(m_Indices[i])) result = +1; else if (header.attribute(m_Indices[i]).isNumeric()) { d1 = o1.value(m_Indices[i]); d2 = o2.value(m_Indices[i]); if (d1 < d2) result = -1; else if (d1 == d2) result = 0; else result = +1; } else { result = o1.stringValue(m_Indices[i]).compareTo(o2.stringValue(m_Indices[i])); } if (!m_Ascending[i]) result = -result; // add weight to index weight = (int) Math.pow(10, (m_Indices.length - i)); result *= weight; i++; } return result; }
From source file:adams.flow.sink.WekaInstanceViewer.java
License:Open Source License
/** * Displays the token (the panel and dialog have already been created at * this stage)./*w w w. ja v a 2 s . c o m*/ * * @param token the token to display */ @Override protected void display(Token token) { InstanceContainerManager manager; InstanceContainer cont; weka.core.Instance winst; weka.core.Attribute att; String id; adams.data.instance.Instance inst; if (token.getPayload() instanceof weka.core.Instance) { winst = (weka.core.Instance) token.getPayload(); inst = new adams.data.instance.Instance(); inst.set(winst); if (!m_ID.isEmpty()) { att = winst.dataset().attribute(m_ID); if (att != null) { if (att.isNominal() || att.isString()) id = winst.stringValue(att.index()); else id = "" + winst.value(att.index()); inst.setID(id); } } } else { inst = (adams.data.instance.Instance) token.getPayload(); if (inst.hasReport() && inst.getReport().hasValue(m_ID)) inst.setID("" + inst.getReport().getValue(new Field(m_ID, DataType.UNKNOWN))); } manager = m_InstancePanel.getContainerManager(); cont = manager.newContainer(inst); manager.startUpdate(); manager.add(cont); m_Updater.update(m_InstancePanel, cont); }