List of usage examples for weka.core Instances classIndex
publicint classIndex()
From source file:meka.core.StatUtils.java
License:Open Source License
/** * CondDepMatrix - Get a Conditional Dependency Matrix. * Based on Zhang's 'LEAD' approach, where<br> * the probability of labels j and k both getting errors on the same instance is error(j)*error(k) * if the actual co-occurence is otherwise. * @param D dataset/* w ww. jav a2 s .c o m*/ * @return a L*L matrix of Unconditional Depndence. */ public static double[][] condDepMatrix(Instances D, Result result) { int L = D.classIndex(); int N = D.numInstances(); double T[][] = MLUtils.getYfromD(D); // Output (TEACHER) double Y[][] = MatrixUtils.threshold(result.allPredictions(), 0.5); // Output (PREDICTED) result.output = Result.getStats(result, "6"); // <-- high verbosity, because we need individual accuracies double E[] = fillError(result, L); // Errors (EXPECTED) double F[][][] = new double[3][L][L]; // Errors (ACTUAL) // Find the actual co-occurence ... for (int i = 0; i < N; i++) { int y[] = A.toIntArray(Y[i], 0.5); // predicted int t[] = A.toIntArray(T[i], 0.5); // actual (teacher) for (int j = 0; j < L; j++) { for (int k = j + 1; k < L; k++) { if (y[j] != t[j] && y[k] != t[k]) { // if j incorrect and k also ... F[0][j][k]++; // error type 0 } else if (y[j] == t[j] && t[k] == y[k]) { // both are correct F[2][j][k]++; // error type 2 } else { // if only one is correct F[1][j][k]++; // error type 1 } } } } // Un-Normalize with the Expected error double E_norm[][][] = new double[3][L][L]; for (int j = 0; j < L; j++) { for (int k = j + 1; k < L; k++) { E_norm[0][j][k] = N * (E[j] * E[k]); E_norm[2][j][k] = N * ((1.0 - E[k]) * (1.0 - E[j])); E_norm[1][j][k] = N * ((E[j] * (1.0 - E[k])) + (1.0 - E[j]) * E[k]); } } return StatUtils.chi2(F, E_norm); }
From source file:meka.core.StatUtils.java
License:Open Source License
/** * LEAD. // w ww. j ava 2 s . c o m * Do the chi-squared LEAD test on all labels in D. * We would expect the 3 kinds of error to be uncorrelacted. * However, if they are significantly correlated, this means that there is conditional dependence! */ public static double[][] LEAD2(Instances D, Result result) { int L = D.classIndex(); int N = D.numInstances(); double Y[][] = MLUtils.getYfromD(D); // Real double Y_[][] = MatrixUtils.threshold(result.allPredictions(), 0.5); // Predicted // Error double E[][] = MatrixUtils.subtract(Y, Y_); // Expected (for each j) double X[][] = new double[L][L]; for (int j = 0; j < L; j++) { for (int k = j + 1; k < L; k++) { for (int v : new int[] { 0, 1, -1 }) { double p_j = p(E, j, v); // prior double p_k = p(E, k, v); // prior double p_jk = P(E, j, v, k, v); // joint double Exp = p_j * p_k; // expected //System.out.println("v = "+v); //System.out.println("p_j "+p_j); //System.out.println("p_k "+p_k); //System.out.println("p_jk"+p_jk); X[j][k] += (((p_jk - Exp) * (p_jk - Exp)) / Exp); // calc. } //System.out.println(""+X[j][k]); X[j][k] -= CRITICAL[1]; } } return X; }
From source file:meka.core.StatUtils.java
License:Open Source License
/** * LEAD - Performs LEAD on dataset 'D', with corresponding gresult 'R', and dependency measurement type 'MDType'. *///from w ww. j a va 2s . co m public static double[][] LEAD(Instances D, Result R, String MDType) { int L = D.classIndex(); int N = D.numInstances(); // Extract true labels from D, predicted labels from R double Ytrue[][] = MLUtils.getYfromD(D); // True double Ypred[][] = MatrixUtils.threshold(R.allPredictions(), 0.5); // Predicted // Make Error matrix double E[][] = MatrixUtils.abs(MatrixUtils.subtract(Ytrue, Ypred)); // Replace labels with errors Instances D_E = MLUtils.replaceZasClasses(new Instances(D), E, L); // Pass through any measure of marginal dependence return StatUtils.margDepMatrix(D_E, MDType); }
From source file:meka.core.StatUtils.java
License:Open Source License
/** * Main - do some tests.//from www . j av a 2s . c o m */ public static void main(String args[]) throws Exception { Instances D = Evaluation.loadDataset(args); MLUtils.prepareData(D); int L = D.classIndex(); double CD[][] = null; if (args[2].equals("L")) { String I = "I"; if (args.length >= 3) I = args[3]; CD = StatUtils.LEAD(D, new SMO(), new Random(), I); } else { CD = StatUtils.margDepMatrix(D, args[2]); } System.out.println(MatrixUtils.toString(CD, "M" + args[2])); }
From source file:meka.core.SuperLabelUtils.java
License:Open Source License
/** * Get Partition From Dataset Hierarchy - assumes attributes are hierarchically arranged with '.'. * For example europe.spain indicates leafnode spain of branch europe. * @param D Dataset//from ww w. j a v a 2s .c o m * @return partition */ public static final int[][] getPartitionFromDatasetHierarchy(Instances D) { HashMap<String, LabelSet> map = new HashMap<String, LabelSet>(); int L = D.classIndex(); for (int j = 0; j < L; j++) { String s = D.attribute(j).name().split("\\.")[0]; LabelSet Y = map.get(s); if (Y == null) Y = new LabelSet(new int[] { j }); else { Y.indices = A.append(Y.indices, j); Arrays.sort(Y.indices); } map.put(s, Y); } int partition[][] = new int[map.size()][]; int i = 0; for (LabelSet part : map.values()) { //System.out.println(""+i+": "+Arrays.toString(part.indices)); partition[i++] = part.indices; } return partition; }
From source file:meka.core.SuperLabelUtils.java
License:Open Source License
/** * Make Partition Dataset - out of dataset D, on indices part[]. * @param D regular multi-label dataset (of L = classIndex() labels) * @param part list of indices we want to make into a PS dataset. * @param P see {@link PSUtils}/*from w ww. j av a2 s.c om*/ * @param N see {@link PSUtils} * @return Dataset with 1 multi-valued class label, representing the combinations of part[]. */ public static Instances makePartitionDataset(Instances D, int part[], int P, int N) throws Exception { int L = D.classIndex(); Instances D_ = new Instances(D); // strip out irrelevant attributes D_.setClassIndex(-1); D_ = F.keepLabels(D, L, part); D_.setClassIndex(part.length); // make LC transformation D_ = PSUtils.PSTransformation(D_, P, N); return D_; }
From source file:meka.core.SuperLabelUtils.java
License:Open Source License
/** * Super Label Transformation - transform dataset D into a dataset with <code>k</code> multi-class target attributes. * Use the NSR/PS-style pruning and recomposition, according to partition 'indices', and pruning values 'p' and 'n'. * @see PSUtils.PSTransformation//from w w w . j a va2 s . co m * @param indices m by k: m super variables, each relating to k original variables * @param D either multi-label or multi-target dataset * @param p pruning value * @param n subset relpacement value * @return a multi-target dataset */ public static Instances SLTransformation(Instances D, int indices[][], int p, int n) { int L = D.classIndex(); int K = indices.length; ArrayList<String> values[] = new ArrayList[K]; HashMap<String, Integer> counts[] = new HashMap[K]; // create D_ Instances D_ = new Instances(D); // clear D_ // F.removeLabels(D_,L); for (int j = 0; j < L; j++) { D_.deleteAttributeAt(0); } // create atts for (int j = 0; j < K; j++) { int att[] = indices[j]; //int values[] = new int[2]; //getValues(indices,D,p); counts[j] = getCounts(D, att, p); Set<String> vals = counts[j].keySet(); //getValues(D,att,p); values[j] = new ArrayList(vals); D_.insertAttributeAt(new Attribute(encodeClass(att), new ArrayList(vals)), j); } // copy over values ArrayList<Integer> deleteList = new ArrayList<Integer>(); for (int i = 0; i < D.numInstances(); i++) { Instance x = D.instance(i); for (int j = 0; j < K; j++) { String y = encodeValue(x, indices[j]); try { D_.instance(i).setValue(j, y); // y = } catch (Exception e) { // value not allowed deleteList.add(i); // mark it for deletion String y_close[] = getTopNSubsets(y, counts[j], n); // get N subsets for (int m = 0; m < y_close.length; m++) { //System.out.println("add "+y_close[m]+" "+counts[j]); Instance x_copy = (Instance) D_.instance(i).copy(); x_copy.setValue(j, y_close[m]); x_copy.setWeight(1.0 / y_close.length); D_.add(x_copy); } } } } // clean up Collections.sort(deleteList, Collections.reverseOrder()); //System.out.println("Deleting "+deleteList.size()+" defunct instances."); for (int i : deleteList) { D_.delete(i); } // set class D_.setClassIndex(K); // done! return D_; }
From source file:meka.filters.multilabel.SuperNodeFilter.java
License:Open Source License
@Override public Instances determineOutputFormat(Instances D) throws Exception { //System.out.println("DETERMINE OUTPUT FORMAT = "+D.numInstances()); Instances D_out = new Instances(D, 0); int L = D.classIndex(); for (int i = 0; i < L - indices.length; i++) { D_out.deleteAttributeAt(0);/*from w w w. j a v a 2 s .c o m*/ } return D_out; }
From source file:meka.filters.multilabel.SuperNodeFilter.java
License:Open Source License
@Override public Instances process(Instances D) throws Exception { //System.out.println("PROCESS! = "+D.numInstances()); int L = D.classIndex(); D = new Instances(D); // D_ // rename classes for (int j = 0; j < L; j++) { D.renameAttribute(j, encodeClass(j)); }//www .j a va 2 s. co m // merge labels D = mergeLabels(D, indices, m_P, m_N); // templates x_template = D.firstInstance(); setOutputFormat(D); //System.out.println("PROCESS! => "+D); return D; }
From source file:meka.filters.multilabel.SuperNodeFilter.java
License:Open Source License
/** * Merge Labels - Make a new 'D', with labels made into superlabels, according to partition 'indices', and pruning values 'p' and 'n'. * @param D assume attributes in D labeled by original index * @return Instances with attributes at j and k moved to position L as (j,k), with classIndex = L-1 *//* ww w . j a va2 s. co m*/ public static Instances mergeLabels(Instances D, int indices[][], int p, int n) { int L = D.classIndex(); int K = indices.length; ArrayList<String> values[] = new ArrayList[K]; HashMap<String, Integer> counts[] = new HashMap[K]; // create D_ Instances D_ = new Instances(D); // clear D_ for (int j = 0; j < L; j++) { D_.deleteAttributeAt(0); } // create atts for (int j = 0; j < K; j++) { int att[] = indices[j]; //int values[] = new int[2]; //getValues(indices,D,p); counts[j] = getCounts(D, att, p); Set<String> vals = counts[j].keySet(); //getValues(D,att,p); values[j] = new ArrayList(vals); D_.insertAttributeAt(new Attribute(encodeClass(att), new ArrayList(vals)), j); } // copy over values ArrayList<Integer> deleteList = new ArrayList<Integer>(); for (int i = 0; i < D.numInstances(); i++) { Instance x = D.instance(i); for (int j = 0; j < K; j++) { String y = encodeValue(x, indices[j]); try { D_.instance(i).setValue(j, y); // y = } catch (Exception e) { // value not allowed deleteList.add(i); // mark it for deletion String y_close[] = NSR.getTopNSubsets(y, counts[j], n); // get N subsets for (int m = 0; m < y_close.length; m++) { //System.out.println("add "+y_close[m]+" "+counts[j]); Instance x_copy = (Instance) D_.instance(i).copy(); x_copy.setValue(j, y_close[m]); x_copy.setWeight(1.0 / y_close.length); D_.add(x_copy); } } } } // clean up Collections.sort(deleteList, Collections.reverseOrder()); //System.out.println("Deleting "+deleteList.size()+" defunct instances."); for (int i : deleteList) { D_.delete(i); } // set class D_.setClassIndex(K); // done! D = null; return D_; }