List of usage examples for weka.core Instance attribute
public Attribute attribute(int index);
From source file:knn.KNNClassifier.java
double EuclideanDistance(Instance instanceLHS, Instance instanceRHS) { // set dist to 0 double distance = 0; // from index 0 to left and right side's number of attributes - 1... for (int i = 0; i < instanceLHS.numAttributes() - 1 && i < instanceRHS.numAttributes() - 1; i++) { // if left and right side's attributes are numeric, set the distance equal // to the value of left value - right value all squared if (instanceLHS.attribute(i).isNumeric() && instanceRHS.attribute(i).isNumeric()) { distance += pow(instanceLHS.value(i) - instanceRHS.value(i), 2); } else {/*from w w w. j a v a 2 s.c o m*/ // else add 5 to the distance unless left and right side's string converted // values are equal to one another, in which case set dist back to zero if (instanceLHS.stringValue(i).equals(instanceRHS.stringValue(i))) { distance += 0; } distance += 5; } } return distance; }
From source file:knn.KNNClassifier.java
double ManhattenDistance(Instance instanceLHS, Instance instanceRHS) { // set dist to 0 double distance = 0; // from index 0 to number of attributes - 1 on both sides... for (int i = 0; i < instanceLHS.numAttributes() - 1 && i < instanceRHS.numAttributes() - 1; i++) { // if left and right side's attributes are numbers set distance equal // to absolute value of left's value - right's value if (instanceLHS.attribute(i).isNumeric() && instanceRHS.attribute(i).isNumeric()) { distance += abs(instanceLHS.value(i) - instanceRHS.value(i)); } else {//from w ww. j a v a 2 s.c o m // else add 5 to distance unless left and right are equal, in which // case set the distance back to 0 if (instanceLHS.stringValue(i).equals(instanceRHS.stringValue(i))) { distance = 0; } distance += 5; } } return distance; }
From source file:knnclassifier.KNNClassifier.java
double EuclideanDistance(Instance instanceLHS, Instance instanceRHS) { double distance = 0; for (int i = 0; i < instanceLHS.numAttributes() - 1 && i < instanceRHS.numAttributes() - 1; i++) { if (instanceLHS.attribute(i).isNumeric() && instanceRHS.attribute(i).isNumeric()) { distance += pow(instanceLHS.value(i) - instanceRHS.value(i), 2); } else {//from ww w . j a v a 2 s . c o m if (instanceLHS.stringValue(i).equals(instanceRHS.stringValue(i))) { distance += 0; } distance += 5; } } return distance; }
From source file:knnclassifier.KNNClassifier.java
double ManhattenDistance(Instance instanceLHS, Instance instanceRHS) { double distance = 0; for (int i = 0; i < instanceLHS.numAttributes() - 1 && i < instanceRHS.numAttributes() - 1; i++) { if (instanceLHS.attribute(i).isNumeric() && instanceRHS.attribute(i).isNumeric()) { distance += abs(instanceLHS.value(i) - instanceRHS.value(i)); } else {/*from w ww. j a va2s . c om*/ if (instanceLHS.stringValue(i).equals(instanceRHS.stringValue(i))) { distance = 0; } distance += 5; } } return distance; }
From source file:lattice.Lattice.java
License:Open Source License
/** * Constructor of a lattice over the given variables of the dataset. * /*from w w w. java2s . co m*/ * @param dataset */ public Lattice(Instances dataset) { // ~ initialise internal structure for counting (TID sets) this.nbInstances = dataset.numInstances(); this.nbVariables = dataset.numAttributes(); BitSet[][] presence = new BitSet[nbVariables][]; TreeSet<Integer> allAttributesNumbers = new TreeSet<Integer>(); int[] nbValuesForAttribute = new int[nbVariables]; for (int a = 0; a < nbVariables; a++) { nbValuesForAttribute[a] = dataset.numDistinctValues(a) + 1; //+1 for missing presence[a] = new BitSet[nbValuesForAttribute[a]]; allAttributesNumbers.add(a); for (int v = 0; v < presence[a].length; v++) { presence[a][v] = new BitSet(); } } for (int i = 0; i < nbInstances; i++) { Instance row = dataset.instance(i); for (int a = 0; a < nbVariables; a++) { int indexOfValue; if (row.isMissing(a)) { // indexOfValue = (int) dataset.meanOrMode(a); indexOfValue = dataset.numDistinctValues(a); //missing at the end } else { String value = row.stringValue(a); indexOfValue = row.attribute(a).indexOfValue(value); } presence[a][indexOfValue].set(i); } } // initialise the first nodes of the lattice (i.e., the ones // corresponding to single variables this.all = new LatticeNode(this, nbValuesForAttribute); this.singleNodes = new LatticeNode[nbVariables]; for (int a = 0; a < nbVariables; a++) { int[] variablesNumbers = { a }; LatticeNode node = new LatticeNode(this, variablesNumbers, nbValuesForAttribute, presence[a], all); singleNodes[a] = node; } }
From source file:lattice.Lattice.java
License:Open Source License
public Lattice(Instances structure, ArffReader loader) throws IOException { // ~ initialise internal structure for counting (TID sets) this.nbInstances = 0; this.nbVariables = structure.numAttributes(); BitSet[][] presence = new BitSet[nbVariables][]; TreeSet<Integer> allAttributesNumbers = new TreeSet<Integer>(); int[] nbValuesForAttribute = new int[nbVariables]; for (int a = 0; a < nbVariables; a++) { nbValuesForAttribute[a] = structure.numDistinctValues(a) + 1;//+1 for missing presence[a] = new BitSet[nbValuesForAttribute[a]]; allAttributesNumbers.add(a);/*w ww . j av a2s. c o m*/ for (int v = 0; v < presence[a].length; v++) { presence[a][v] = new BitSet(); } } Instance row; while ((row = loader.readInstance(structure)) != null) { for (int a = 0; a < nbVariables; a++) { int indexOfValue; if (row.isMissing(a)) { indexOfValue = structure.numDistinctValues(a);//missing at the end } else { String value = row.stringValue(a); indexOfValue = row.attribute(a).indexOfValue(value); } presence[a][indexOfValue].set(this.nbInstances); } this.nbInstances++; } // initialise the first nodes of the lattice (i.e., the ones // corresponding to single variables this.all = new LatticeNode(this, nbValuesForAttribute); this.singleNodes = new LatticeNode[nbVariables]; for (int a = 0; a < nbVariables; a++) { int[] variablesNumbers = { a }; LatticeNode node = new LatticeNode(this, variablesNumbers, nbValuesForAttribute, presence[a], all); singleNodes[a] = node; } }
From source file:lu.lippmann.cdb.common.gui.dataset.InstanceFormatter.java
License:Open Source License
public static String htmlFormat(final Instance inst, final boolean withHTMLHeader) { final StringBuilder sb = new StringBuilder(); if (withHTMLHeader) sb.append("<html><body>"); for (int i = 0; i < inst.numAttributes(); i++) { sb.append(StringEscapeUtils.escapeHtml(inst.attribute(i).name())).append(" = "); sb.append("<b>"); if (inst.attribute(i).isNominal() || inst.attribute(i).isString()) { sb.append(StringEscapeUtils.escapeHtml(inst.stringValue(i))); } else if (inst.attribute(i).isDate()) { final Calendar cal = Calendar.getInstance(); cal.setTimeInMillis((long) inst.value(i)); sb.append(FormatterUtil.DATE_FORMAT.format(cal.getTime())); } else if (inst.attribute(i).isNumeric()) { sb.append(inst.value(i));/*from ww w .ja v a 2s. c o m*/ } sb.append("</b>"); sb.append("<br/>"); } if (withHTMLHeader) sb.append("</body></html>"); return sb.toString(); }
From source file:lu.lippmann.cdb.lab.beta.shih.Shih2010.java
License:Open Source License
/** * /*www . j a v a2s . c o m*/ * @return * @throws Exception */ private void computeMatrixMDF() throws Exception { //Build filtered instance for each element of the base int baseSize = base.size(); Attribute baseAttribute = instances.attribute(baseIndex); //Create baseSize copy of instances for filtering List<Instances> filteredInstances = new ArrayList<Instances>(); for (int i = 0; i < baseSize; i++) { filteredInstances.add(new Instances(instances)); } //Filter for (int i = 0; i < instances.numInstances(); i++) { final Instance instance = instances.instance(i); for (final TupleSI j : base) { final int wekaAttributeValue = (int) instance.value(baseIndex); if (!new TupleSI(baseAttribute.value(wekaAttributeValue), baseIndex).equals(j)) { WekaUtil2.removeFromInstances(filteredInstances.get(base.indexOf(j)), instance); } } } //Compute I vector final int idxsNs = idxsN.size(); if (idxsNs == 0) { throw new Exception("You need at least one numerical attribute !!"); } int minIndexForI = -1; double minValueForI = Double.MAX_VALUE; final double[][] meanBase = new double[idxsNs][baseSize]; int p = 0; for (final Integer num : idxsN) { double Ip = 0.0; for (int j = 0; j < baseSize; j++) { final List<Instance> filtredInstance = filteredInstances.get(j); final int fs = filtredInstance.size(); double mean = 0; for (int l = 0; l < fs; l++) { mean += filtredInstance.get(l).value(num); } mean = mean / fs; meanBase[p][j] = mean; for (int l = 0; l < fs; l++) { Ip += Math.pow(filtredInstance.get(l).value(num) - mean, 2); } } if (Ip < minValueForI) { minValueForI = Ip; minIndexForI = p; } p++; } this.maxNum = new double[idxsNs]; for (int i = 0; i < instances.numInstances(); i++) { final Instance instance = instances.instance(i); //Save maximum value for each numerical attribute for (Integer n1 : idxsN) { double val = instance.value(n1); int idx = idxsN.indexOf(n1); if (val > maxNum[idx]) { maxNum[idx] = val; } } //Compute matrix M for each categorical attribute for (final Integer e1 : idxsC) { for (final Integer e2 : idxsC) { final int i1 = getIndexOf(e1, instance.attribute(e1).value((int) instance.value(e1))); final int j1 = getIndexOf(e2, instance.attribute(e2).value((int) instance.value(e2))); M[i1][j1] = M[i1][j1] + 1; } } } //Compute D matrix for (int i = 0; i < n; i++) { for (int j = 0; j < n; j++) { double d = M[i][j] / (M[i][i] + M[j][j] - M[i][j] + 0.0); if (d >= theta) { D[i][j] = d; } else { D[i][j] = 0; } } } //Compute F matrix for base for (final TupleSI baseVal : base) { F.put(baseVal, meanBase[minIndexForI][base.indexOf(baseVal)]); } //Compute F matrix for noBase for (final TupleSI noBaseVal : noBase) { double f = 0.0; for (final TupleSI baseVal : base) { f += D[getIndexOf(noBaseVal)][getIndexOf(baseVal)] * F.get(baseVal); } F.put(noBaseVal, f); } }
From source file:lu.lippmann.cdb.lab.beta.shih.Shih2010.java
License:Open Source License
/** * /*from w w w. jav a 2s .com*/ * @return */ public Instances getModifiedInstances() { //Copy attribute list (and change categorical by numerical) final ArrayList<Attribute> lAttrs = new ArrayList<Attribute>(); for (int i = 0; i < instances.numAttributes(); i++) { Attribute attr = instances.attribute(i); if (attr.isNumeric() || attr.index() == instances.classIndex()) { lAttrs.add(attr); } else { Attribute newAttr = new Attribute(attr.name()); lAttrs.add(newAttr); } } //Build new instance final Instances newInstances = new Instances("Shih instance", lAttrs, instances.numInstances()); newInstances.setClassIndex(instances.classIndex()); for (int i = 0; i < instances.numInstances(); i++) { final Instance instance = instances.instance(i); final Instance cpyInstance = (Instance) instance.copy(); for (int j = 0; j < instance.numAttributes(); j++) { Attribute attribute = instance.attribute(j); int k = 0; if (attribute.index() == instances.classIndex()) { //The class index is nominal cpyInstance.setValue(attribute, instance.stringValue(j)); } else if (!attribute.isNumeric()) { String elt = attribute.value((int) instance.value(j)); cpyInstance.setValue(attribute, F.get(new TupleSI(elt, j))); } else { if (maxNum[k] > 1) { cpyInstance.setValue(attribute, instance.value(j) / maxNum[k]); } k++; } } newInstances.add(cpyInstance); } if (ignoreClass && instances.classIndex() != -1) { newInstances.deleteAttributeAt(instances.classIndex()); } return newInstances; }
From source file:lu.lippmann.cdb.lab.beta.util.WekaUtil2.java
License:Open Source License
/** * Generate the centroid coordinates based * on it's members (objects assigned to the cluster of the centroid) and the distance * function being used.// w w w . j a v a2 s. co m * @return the centroid */ public static MixedCentroid computeMixedCentroid(final boolean preserveOrder, final NormalizableDistance distanceFunction, final Instances numericInstances, final Instances originalInstances, final int clusterIndex) { final int numInstances = numericInstances.numInstances(); final int numAttributes = numericInstances.numAttributes(); final Map<TupleSI, Integer> addedAttr = new HashMap<TupleSI, Integer>(); if (numInstances == 1) { Instance uniqueNumInstance = numericInstances.firstInstance(); Instance uniqueMixInstance = originalInstances.firstInstance(); double[] centroid = uniqueNumInstance.toDoubleArray(); for (int i = 0; i < uniqueMixInstance.numAttributes(); i++) { if (!uniqueMixInstance.attribute(i).isNumeric()) { final String catVal = uniqueMixInstance.attribute(i).value((int) uniqueMixInstance.value(i)); addedAttr.put(new TupleSI(catVal, i), 1); } } return new MixedCentroid(clusterIndex, centroid, addedAttr); } final double[] vals = new double[numAttributes]; //used only for Manhattan Distance Instances sortedMembers = null; int middle = 0; boolean dataIsEven = false; final boolean isManhattanDist = (distanceFunction instanceof ManhattanDistance); final boolean isEuclideanDist = (distanceFunction instanceof EuclideanDistance); if (isManhattanDist) { middle = (numInstances - 1) / 2; dataIsEven = ((numInstances % 2) == 0); if (preserveOrder) { sortedMembers = numericInstances; } else { sortedMembers = new Instances(numericInstances); } } for (int j = 0; j < numAttributes; j++) { //in case of Euclidian distance the centroid is the mean point //in case of Manhattan distance the centroid is the median point //in both cases, if the attribute is nominal, the centroid is the mode if (isEuclideanDist) { vals[j] = numericInstances.meanOrMode(j); for (int i = 0; i < numInstances; i++) { if (!originalInstances.attribute(j).isNumeric()) { final Instance instance = originalInstances.instance(i); final String catVal = instance.attribute(j).value((int) instance.value(j)); //Initialize map final TupleSI key = new TupleSI(catVal, j); if (!addedAttr.containsKey(key)) addedAttr.put(key, 0); addedAttr.put(key, addedAttr.get(key) + 1); } } } else if (isManhattanDist) { sortedMembers.kthSmallestValue(j, middle + 1); vals[j] = sortedMembers.instance(middle).value(j); if (dataIsEven) { sortedMembers.kthSmallestValue(j, middle + 2); vals[j] = (vals[j] + sortedMembers.instance(middle + 1).value(j)) / 2; } } else { throw new IllegalStateException("Not handled distance ..."); } } return new MixedCentroid(clusterIndex, vals, addedAttr); }