List of usage examples for weka.core Instances instance
publicInstance instance(int index)
From source file:meka.core.MLUtils.java
License:Open Source License
/** * GetXfromD - Extract labels as a double Y[][] from Instances D. * TODO: getYfromInstances would be a better name. *//*w ww. j a v a 2s . c o m*/ public static double[][] getYfromD(Instances D) { int L = D.classIndex(); int N = D.numInstances(); double Y[][] = new double[N][L]; for (int i = 0; i < N; i++) { for (int k = 0; k < L; k++) { Y[i][k] = D.instance(i).value(k); } } return Y; }
From source file:meka.core.MLUtils.java
License:Open Source License
/** * InsertZintoD - Insert data Z[][] to Instances D (e.g., as labels). * NOTE: Assumes binary labels!/*from w w w.j a va2 s . c o m*/ * @see #addZtoD(Instances, double[][], int) */ private static Instances insertZintoD(Instances D, double Z[][]) { int L = Z[0].length; // add attributes for (int j = 0; j < L; j++) { D.insertAttributeAt(new Attribute("c" + j, Arrays.asList(new String[] { "0", "1" })), j); } // add values Z[0]...Z[N] to D // (note that if D.numInstances() < Z.length, only some are added) for (int j = 0; j < L; j++) { for (int i = 0; i < D.numInstances(); i++) { D.instance(i).setValue(j, Z[i][j] > 0.5 ? 1.0 : 0.0); } } D.setClassIndex(L); return D; }
From source file:meka.core.MLUtils.java
License:Open Source License
/** * AddZtoD - Add attribute space Z[N][H] (N rows of H columns) to Instances D, which should have N rows also. * @param D dataset (of N instances) * @param Z attribute space (of N rows, H columns) * @param L column to add Z from in D *//* w w w. j av a2 s .c om*/ private static Instances addZtoD(Instances D, double Z[][], int L) { int H = Z[0].length; int N = D.numInstances(); // add attributes for (int a = 0; a < H; a++) { D.insertAttributeAt(new Attribute("A" + a), L + a); } // add values Z[0]...Z[N] to D for (int a = 0; a < H; a++) { for (int i = 0; i < N; i++) { D.instance(i).setValue(L + a, Z[i][a]); } } D.setClassIndex(L); return D; }
From source file:meka.core.PSUtils.java
License:Open Source License
/** * CountCombinationsSparseSubset - like CountCombinationsSparse, but only interested in 'indices[]' wrt 'D'. * @param D dataset //from ww w. ja v a2 s .c o m * @param indices indices we are interested in * @return a HashMap where a LabelSet representation of each label combination is associated with an Integer count, e.g., [3,7,14],3 */ public static HashMap<LabelSet, Integer> countCombinationsSparseSubset(Instances D, int indices[]) { HashMap<LabelSet, Integer> map = new HashMap<LabelSet, Integer>(); for (int i = 0; i < D.numInstances(); i++) { LabelSet m = new LabelSet(MLUtils.toSubIndicesSet(D.instance(i), indices)); map.put(m, map.containsKey(m) ? map.get(m) + 1 : 1); } return map; }
From source file:meka.core.PSUtils.java
License:Open Source License
/** * CountCombinationsSparse - return a mapping of each distinct label combination and its count. * @param D dataset /*from w w w .j av a2 s.c om*/ * @param L number of labels * @return a HashMap where a LabelSet representation of each label combination is associated with an Integer count, e.g., [3,7,14],3 */ public static final HashMap<LabelSet, Integer> countCombinationsSparse(Instances D, int L) { HashMap<LabelSet, Integer> map = new HashMap<LabelSet, Integer>(); for (int i = 0; i < D.numInstances(); i++) { LabelSet y = new LabelSet(MLUtils.toSparseIntArray(D.instance(i), L)); Integer c = map.get(y); map.put(y, c == null ? 1 : c + 1); } return map; }
From source file:meka.core.PSUtils.java
License:Open Source License
/** * Transform instances into a multi-class representation. * @param D original dataset/*w ww . jav a 2 s . co m*/ * @param L number of labels in the original dataset * @param cname class name for the new dataset (may want to encode the list of indices here for RAkEL-like methods) * @param p pruning value * @param n restoration value * @return transformed dataset */ public static Instances PSTransformation(Instances D, int L, String cname, int p, int n) { D = new Instances(D); // Gather combinations HashMap<LabelSet, Integer> distinctCombinations = PSUtils.countCombinationsSparse(D, L); // Prune combinations if (p > 0) MLUtils.pruneCountHashMap(distinctCombinations, p); // Check there are > 2 if (distinctCombinations.size() <= 1 && p > 0) { // ... or try again if not ... System.err.println("[Warning] You did too much pruning, setting P = P-1"); return PSTransformation(D, L, cname, p - 1, n); } // Create class attribute ArrayList<String> ClassValues = new ArrayList<String>(); for (LabelSet y : distinctCombinations.keySet()) ClassValues.add(y.toString()); Attribute C = new Attribute(cname, ClassValues); // Insert new special attribute (which has all possible combinations of labels) D.insertAttributeAt(C, L); D.setClassIndex(L); //Add class values int N = D.numInstances(); for (int i = 0; i < N; i++) { Instance x = D.instance(i); LabelSet y = new LabelSet(MLUtils.toSparseIntArray(x, L)); String y_string = y.toString(); // add it if (ClassValues.contains(y_string)) //if its class value exists x.setClassValue(y_string); // decomp else if (n > 0) { //String d_subsets[] = getTopNSubsets(comb,distinctCombinations,n); LabelSet d_subsets[] = PSUtils.getTopNSubsets(y, distinctCombinations, n); //LabelSet d_subsets[] = PSUtils.cover(y,distinctCombinations); if (d_subsets.length > 0) { // fast x.setClassValue(d_subsets[0].toString()); // additional if (d_subsets.length > 1) { for (int s_i = 1; s_i < d_subsets.length; s_i++) { Instance x_ = (Instance) (x).copy(); x_.setClassValue(d_subsets[s_i].toString()); D.add(x_); } } } else { x.setClassMissing(); } } } // remove with missing class D.deleteWithMissingClass(); try { D = F.removeLabels(D, L); } catch (Exception e) { // should never happen } D.setClassIndex(0); return D; }
From source file:meka.core.PSUtils.java
License:Open Source License
/** * Transform instances into a multi-class representation. * @param D original dataset//w w w. j av a 2 s . co m * @param L number of labels in that dataset * @param cname class name for the new dataset (may want to encode the list of indices here for RAkEL-like methods) * @param p pruning value * @param n restoration value * @return transformed dataset */ public static Instances SLTransformation(Instances D, int L, String cname, int p, int n) { D = new Instances(D); // Gather combinations HashMap<LabelSet, Integer> distinctCombinations = PSUtils.countCombinationsSparse(D, L); // Prune combinations if (p > 0) MLUtils.pruneCountHashMap(distinctCombinations, p); // Check there are > 2 if (distinctCombinations.size() <= 1 && p > 0) { // ... or try again if not ... System.err.println("[Warning] You did too much pruning, setting P = P-1"); return PSTransformation(D, L, cname, p - 1, n); } // Create class attribute ArrayList<String> ClassValues = new ArrayList<String>(); for (LabelSet y : distinctCombinations.keySet()) ClassValues.add(y.toString()); Attribute C = new Attribute(cname, ClassValues); // Insert new special attribute (which has all possible combinations of labels) D.insertAttributeAt(C, L); D.setClassIndex(L); //Add class values int N = D.numInstances(); for (int i = 0; i < N; i++) { Instance x = D.instance(i); LabelSet y = new LabelSet(MLUtils.toSparseIntArray(x, L)); String y_string = y.toString(); // add it if (ClassValues.contains(y_string)) //if its class value exists x.setClassValue(y_string); // decomp else if (n > 0) { //String d_subsets[] = getTopNSubsets(comb,distinctCombinations,n); LabelSet d_subsets[] = PSUtils.getTopNSubsets(y, distinctCombinations, n); //LabelSet d_subsets[] = PSUtils.cover(y,distinctCombinations); if (d_subsets.length > 0) { // fast x.setClassValue(d_subsets[0].toString()); // additional if (d_subsets.length > 1) { for (int s_i = 1; s_i < d_subsets.length; s_i++) { Instance x_ = (Instance) (x).copy(); x_.setClassValue(d_subsets[s_i].toString()); D.add(x_); } } } else { x.setClassMissing(); } } } // remove with missing class D.deleteWithMissingClass(); try { D = F.removeLabels(D, L); } catch (Exception e) { // should never happen } D.setClassIndex(0); return D; }
From source file:meka.core.StatUtils.java
License:Open Source License
/** * jPMF - Joint PMF.//from w ww.j a v a2 s .com * @return the joint PMF of the j-th and k-th labels in D. */ public static double[][] jPMF(Instances D, int j, int k) { double JOINT[][] = new double[D.attribute(j).numValues()][D.attribute(k).numValues()]; int N = D.numInstances(); for (int i = 0; i < N; i++) { int v_j = (int) Math.round(D.instance(i).value(j)); int v_k = (int) Math.round(D.instance(i).value(k)); JOINT[v_j][v_k] += (1.0 / (double) N); } return JOINT; }
From source file:meka.core.StatUtils.java
License:Open Source License
/** * Joint Distribution./*from w ww . j a va 2s . com*/ * @return the joint PMF of the j-th and k-th and lthlabels in D. */ public static double[][][] jPMF(Instances D, int j, int k, int l) { double JOINT[][][] = new double[D.attribute(j).numValues()][D.attribute(k).numValues()][D.attribute(l) .numValues()]; int N = D.numInstances(); for (int i = 0; i < N; i++) { int v_j = (int) Math.round(D.instance(i).value(j)); int v_k = (int) Math.round(D.instance(i).value(k)); int v_l = (int) Math.round(D.instance(i).value(l)); JOINT[v_j][v_k][v_l] += (1.0 / (double) N); } return JOINT; }
From source file:meka.core.StatUtils.java
License:Open Source License
/** * GetC - Get pairwise co-ocurrence counts from the training data D. * <br>//from ww w. ja v a 2s . c om * NOTE multi-label only * @return C[][] where C[j][k] is the number of times where Y[i][j] = 1 and y[i][k] = 1 over all i = 1,...,N */ public static int[][] getC(Instances D) { int L = D.classIndex(); int N = D.numInstances(); int C[][] = new int[L][L]; for (int i = 0; i < N; i++) { for (int j = 0; j < L; j++) { C[j][j] += (int) D.instance(i).value(j); // C[j==1] ++ for (int k = j + 1; k < L; k++) { C[j][k] += (D.instance(i).value(j) + D.instance(i).value(k) >= 2.0) ? 1 : 0; // C[j==1,k==1] ++ } } } return C; }