Example usage for weka.core Instances classIndex

List of usage examples for weka.core Instances classIndex

Introduction

In this page you can find the example usage for weka.core Instances classIndex.

Prototype


publicint classIndex() 

Source Link

Document

Returns the class attribute's index.

Usage

From source file:meka.core.StatUtils.java

License:Open Source License

/**
 * CondDepMatrix - Get a Conditional Dependency Matrix.
 * Based on Zhang's 'LEAD' approach, where<br>
 * the probability of labels j and k both getting errors on the same instance is error(j)*error(k)
 * if the actual co-occurence is otherwise. 
 * @param   D   dataset/* w  ww. jav  a2 s  .c o  m*/
 * @return a L*L matrix of Unconditional Depndence.
 */
public static double[][] condDepMatrix(Instances D, Result result) {

    int L = D.classIndex();
    int N = D.numInstances();
    double T[][] = MLUtils.getYfromD(D); // Output (TEACHER)
    double Y[][] = MatrixUtils.threshold(result.allPredictions(), 0.5); // Output (PREDICTED)
    result.output = Result.getStats(result, "6"); // <-- high verbosity, because we need individual accuracies            
    double E[] = fillError(result, L); // Errors (EXPECTED)
    double F[][][] = new double[3][L][L]; // Errors (ACTUAL)
    // Find the actual co-occurence ...
    for (int i = 0; i < N; i++) {
        int y[] = A.toIntArray(Y[i], 0.5); // predicted
        int t[] = A.toIntArray(T[i], 0.5); // actual (teacher)
        for (int j = 0; j < L; j++) {
            for (int k = j + 1; k < L; k++) {
                if (y[j] != t[j] && y[k] != t[k]) {
                    // if j incorrect and k also ...
                    F[0][j][k]++; // error type 0
                } else if (y[j] == t[j] && t[k] == y[k]) {
                    // both are correct
                    F[2][j][k]++; // error type 2
                } else {
                    // if only one is correct
                    F[1][j][k]++; // error type 1
                }
            }
        }
    }

    // Un-Normalize with the Expected error
    double E_norm[][][] = new double[3][L][L];
    for (int j = 0; j < L; j++) {
        for (int k = j + 1; k < L; k++) {
            E_norm[0][j][k] = N * (E[j] * E[k]);
            E_norm[2][j][k] = N * ((1.0 - E[k]) * (1.0 - E[j]));
            E_norm[1][j][k] = N * ((E[j] * (1.0 - E[k])) + (1.0 - E[j]) * E[k]);
        }
    }
    return StatUtils.chi2(F, E_norm);
}

From source file:meka.core.StatUtils.java

License:Open Source License

/**
 * LEAD. // w  ww.  j  ava  2 s  . c o  m
 * Do the chi-squared LEAD test on all labels in D.
 * We would expect the 3 kinds of error to be uncorrelacted.
 * However, if they are significantly correlated, this means that there is conditional dependence!
 */
public static double[][] LEAD2(Instances D, Result result) {

    int L = D.classIndex();
    int N = D.numInstances();
    double Y[][] = MLUtils.getYfromD(D); // Real
    double Y_[][] = MatrixUtils.threshold(result.allPredictions(), 0.5); // Predicted
    // Error
    double E[][] = MatrixUtils.subtract(Y, Y_);
    // Expected (for each j)
    double X[][] = new double[L][L];

    for (int j = 0; j < L; j++) {
        for (int k = j + 1; k < L; k++) {
            for (int v : new int[] { 0, 1, -1 }) {
                double p_j = p(E, j, v); // prior
                double p_k = p(E, k, v); // prior
                double p_jk = P(E, j, v, k, v); // joint
                double Exp = p_j * p_k; // expected
                //System.out.println("v = "+v);
                //System.out.println("p_j "+p_j);
                //System.out.println("p_k "+p_k);
                //System.out.println("p_jk"+p_jk);
                X[j][k] += (((p_jk - Exp) * (p_jk - Exp)) / Exp); // calc.
            }
            //System.out.println(""+X[j][k]);
            X[j][k] -= CRITICAL[1];
        }
    }
    return X;
}

From source file:meka.core.StatUtils.java

License:Open Source License

/**
 * LEAD - Performs LEAD on dataset 'D', with corresponding gresult 'R', and dependency measurement type 'MDType'.
 *///from  w  ww. j a va 2s  .  co m
public static double[][] LEAD(Instances D, Result R, String MDType) {

    int L = D.classIndex();
    int N = D.numInstances();

    // Extract true labels from D, predicted labels from R
    double Ytrue[][] = MLUtils.getYfromD(D); // True
    double Ypred[][] = MatrixUtils.threshold(R.allPredictions(), 0.5); // Predicted

    // Make Error matrix
    double E[][] = MatrixUtils.abs(MatrixUtils.subtract(Ytrue, Ypred));

    // Replace labels with errors
    Instances D_E = MLUtils.replaceZasClasses(new Instances(D), E, L);

    // Pass through any measure of marginal dependence
    return StatUtils.margDepMatrix(D_E, MDType);
}

From source file:meka.core.StatUtils.java

License:Open Source License

/**
 * Main - do some tests.//from  www  . j av a 2s .  c o  m
 */
public static void main(String args[]) throws Exception {
    Instances D = Evaluation.loadDataset(args);
    MLUtils.prepareData(D);
    int L = D.classIndex();

    double CD[][] = null;

    if (args[2].equals("L")) {
        String I = "I";
        if (args.length >= 3)
            I = args[3];
        CD = StatUtils.LEAD(D, new SMO(), new Random(), I);
    } else {
        CD = StatUtils.margDepMatrix(D, args[2]);
    }
    System.out.println(MatrixUtils.toString(CD, "M" + args[2]));
}

From source file:meka.core.SuperLabelUtils.java

License:Open Source License

/**
 * Get Partition From Dataset Hierarchy - assumes attributes are hierarchically arranged with '.'. 
 * For example europe.spain indicates leafnode spain of branch europe.
 * @param   D      Dataset//from ww  w.  j  a v  a  2s .c  o  m
 * @return   partition
 */
public static final int[][] getPartitionFromDatasetHierarchy(Instances D) {
    HashMap<String, LabelSet> map = new HashMap<String, LabelSet>();
    int L = D.classIndex();
    for (int j = 0; j < L; j++) {
        String s = D.attribute(j).name().split("\\.")[0];
        LabelSet Y = map.get(s);
        if (Y == null)
            Y = new LabelSet(new int[] { j });
        else {
            Y.indices = A.append(Y.indices, j);
            Arrays.sort(Y.indices);
        }
        map.put(s, Y);
    }
    int partition[][] = new int[map.size()][];
    int i = 0;
    for (LabelSet part : map.values()) {
        //System.out.println(""+i+": "+Arrays.toString(part.indices));
        partition[i++] = part.indices;
    }
    return partition;
}

From source file:meka.core.SuperLabelUtils.java

License:Open Source License

/**
 * Make Partition Dataset - out of dataset D, on indices part[].
 * @param   D      regular multi-label dataset (of L = classIndex() labels)
 * @param   part   list of indices we want to make into a PS dataset.
 * @param   P      see {@link PSUtils}/*from  w ww.  j av  a2  s.c  om*/
 * @param   N      see {@link PSUtils}
 * @return Dataset with 1 multi-valued class label, representing the combinations of part[].
 */
public static Instances makePartitionDataset(Instances D, int part[], int P, int N) throws Exception {
    int L = D.classIndex();
    Instances D_ = new Instances(D);
    // strip out irrelevant attributes
    D_.setClassIndex(-1);
    D_ = F.keepLabels(D, L, part);
    D_.setClassIndex(part.length);
    // make LC transformation
    D_ = PSUtils.PSTransformation(D_, P, N);
    return D_;
}

From source file:meka.core.SuperLabelUtils.java

License:Open Source License

/**
 * Super Label Transformation - transform dataset D into a dataset with <code>k</code> multi-class target attributes.
 * Use the NSR/PS-style pruning and recomposition, according to partition 'indices', and pruning values 'p' and 'n'.
 * @see PSUtils.PSTransformation//from   w  w w  . j a va2  s  . co  m
 * @param indices   m by k: m super variables, each relating to k original variables
 * @param    D   either multi-label or multi-target dataset
 * @param    p   pruning value
 * @param    n   subset relpacement value
 * @return       a multi-target dataset
 */
public static Instances SLTransformation(Instances D, int indices[][], int p, int n) {

    int L = D.classIndex();
    int K = indices.length;
    ArrayList<String> values[] = new ArrayList[K];
    HashMap<String, Integer> counts[] = new HashMap[K];

    // create D_
    Instances D_ = new Instances(D);

    // clear D_
    // F.removeLabels(D_,L);
    for (int j = 0; j < L; j++) {
        D_.deleteAttributeAt(0);
    }

    // create atts
    for (int j = 0; j < K; j++) {
        int att[] = indices[j];
        //int values[] = new int[2]; //getValues(indices,D,p);
        counts[j] = getCounts(D, att, p);
        Set<String> vals = counts[j].keySet(); //getValues(D,att,p);
        values[j] = new ArrayList(vals);
        D_.insertAttributeAt(new Attribute(encodeClass(att), new ArrayList(vals)), j);
    }

    // copy over values
    ArrayList<Integer> deleteList = new ArrayList<Integer>();
    for (int i = 0; i < D.numInstances(); i++) {
        Instance x = D.instance(i);
        for (int j = 0; j < K; j++) {
            String y = encodeValue(x, indices[j]);
            try {
                D_.instance(i).setValue(j, y); // y =
            } catch (Exception e) {
                // value not allowed
                deleteList.add(i); // mark it for deletion
                String y_close[] = getTopNSubsets(y, counts[j], n); // get N subsets
                for (int m = 0; m < y_close.length; m++) {
                    //System.out.println("add "+y_close[m]+" "+counts[j]);
                    Instance x_copy = (Instance) D_.instance(i).copy();
                    x_copy.setValue(j, y_close[m]);
                    x_copy.setWeight(1.0 / y_close.length);
                    D_.add(x_copy);
                }
            }
        }
    }
    // clean up
    Collections.sort(deleteList, Collections.reverseOrder());
    //System.out.println("Deleting "+deleteList.size()+" defunct instances.");
    for (int i : deleteList) {
        D_.delete(i);
    }
    // set class
    D_.setClassIndex(K);
    // done!
    return D_;
}

From source file:meka.filters.multilabel.SuperNodeFilter.java

License:Open Source License

@Override
public Instances determineOutputFormat(Instances D) throws Exception {
    //System.out.println("DETERMINE OUTPUT FORMAT = "+D.numInstances());
    Instances D_out = new Instances(D, 0);
    int L = D.classIndex();
    for (int i = 0; i < L - indices.length; i++) {
        D_out.deleteAttributeAt(0);/*from  w  w  w.  j  a v a 2 s  .c o  m*/
    }
    return D_out;
}

From source file:meka.filters.multilabel.SuperNodeFilter.java

License:Open Source License

@Override
public Instances process(Instances D) throws Exception {

    //System.out.println("PROCESS! = "+D.numInstances());

    int L = D.classIndex();
    D = new Instances(D); // D_

    // rename classes 
    for (int j = 0; j < L; j++) {
        D.renameAttribute(j, encodeClass(j));
    }//www  .j  a va  2 s. co m

    // merge labels
    D = mergeLabels(D, indices, m_P, m_N);

    // templates
    x_template = D.firstInstance();
    setOutputFormat(D);

    //System.out.println("PROCESS! => "+D);
    return D;
}

From source file:meka.filters.multilabel.SuperNodeFilter.java

License:Open Source License

/**
 * Merge Labels - Make a new 'D', with labels made into superlabels, according to partition 'indices', and pruning values 'p' and 'n'.
 * @param    D   assume attributes in D labeled by original index
 * @return       Instances with attributes at j and k moved to position L as (j,k), with classIndex = L-1
 *//* ww  w  . j  a  va2  s. co m*/
public static Instances mergeLabels(Instances D, int indices[][], int p, int n) {

    int L = D.classIndex();
    int K = indices.length;
    ArrayList<String> values[] = new ArrayList[K];
    HashMap<String, Integer> counts[] = new HashMap[K];

    // create D_
    Instances D_ = new Instances(D);

    // clear D_
    for (int j = 0; j < L; j++) {
        D_.deleteAttributeAt(0);
    }

    // create atts
    for (int j = 0; j < K; j++) {
        int att[] = indices[j];
        //int values[] = new int[2]; //getValues(indices,D,p);
        counts[j] = getCounts(D, att, p);
        Set<String> vals = counts[j].keySet(); //getValues(D,att,p);
        values[j] = new ArrayList(vals);
        D_.insertAttributeAt(new Attribute(encodeClass(att), new ArrayList(vals)), j);
    }

    // copy over values
    ArrayList<Integer> deleteList = new ArrayList<Integer>();
    for (int i = 0; i < D.numInstances(); i++) {
        Instance x = D.instance(i);
        for (int j = 0; j < K; j++) {
            String y = encodeValue(x, indices[j]);
            try {
                D_.instance(i).setValue(j, y); // y = 
            } catch (Exception e) {
                // value not allowed
                deleteList.add(i); // mark it for deletion
                String y_close[] = NSR.getTopNSubsets(y, counts[j], n); // get N subsets
                for (int m = 0; m < y_close.length; m++) {
                    //System.out.println("add "+y_close[m]+" "+counts[j]);
                    Instance x_copy = (Instance) D_.instance(i).copy();
                    x_copy.setValue(j, y_close[m]);
                    x_copy.setWeight(1.0 / y_close.length);
                    D_.add(x_copy);
                }
            }
        }
    }
    // clean up
    Collections.sort(deleteList, Collections.reverseOrder());
    //System.out.println("Deleting "+deleteList.size()+" defunct instances.");
    for (int i : deleteList) {
        D_.delete(i);
    }
    // set class
    D_.setClassIndex(K);
    // done!
    D = null;
    return D_;
}