Example usage for weka.core Instances classIndex

Introduction

In this page you can find the example usage for weka.core Instances classIndex.

Prototype


publicint classIndex()

Source Link

Document

Returns the class attribute's index.

Usage

From source file:meka.core.PSUtils.java

License:Open Source License

public static Instances LCTransformation(Instances D) {
    return LCTransformation(D, D.classIndex());

}

From source file:meka.core.PSUtils.java

License:Open Source License

public static Instances PSTransformation(Instances D, int P, int N) {
    return PSTransformation(D, D.classIndex(), "Class", P, N);
}

From source file:meka.core.StatUtils.java

License:Open Source License

/**
 * GetP - Get a pairwise empirical joint-probability matrix P[][] from dataset D.
 * <br>//from  w  w  w. j  a  va2s . c om
 * NOTE multi-label only
 */
public static double[][] getP(Instances D) {
    double N = (double) D.numInstances();
    int L = D.classIndex();
    double P[][] = new double[L][L];
    for (int j = 0; j < L; j++) {
        P[j][j] = p(D, j, 1);
        for (int k = j + 1; k < L; k++) {
            P[j][k] = P(D, j, 1, k, 1);
        }
    }
    return P;
}

From source file:meka.core.StatUtils.java

License:Open Source License

/**
 * GetApproxP - A fast version of getC(D), based on frequent sets.
 * Actually, if we don't prune, this is not even approximate -- it is the real empirical P.
 *///from   w w  w. jav a 2  s. c om
public static int[][] getApproxC(Instances D) {
    int N = D.numInstances();
    int L = D.classIndex();
    int C[][] = new int[L][L];
    // @todo, can prune here to make even faster by pruning this.
    HashMap<LabelSet, Integer> map = MLUtils.countCombinationsSparse(D, L);

    for (LabelSet y : map.keySet()) {
        int c = map.get(y);
        for (int j = 0; j < y.indices.length; j++) {
            int j_ = y.indices[j];
            C[j_][j_] += c;
            for (int k = j + 1; k < y.indices.length; k++) {
                int k_ = y.indices[k];
                C[j_][k_] += c;
            }
        }
    }

    return C;
}

From source file:meka.core.StatUtils.java

License:Open Source License

/**
 * GetApproxP - A fast version of getP(D), based on frequent sets.
 * Actually, if we don't prune, this is not even approximate -- it is the real empirical P.
 *//*from  w  w w .  ja v a 2 s. c o  m*/
public static double[][] getApproxP(Instances D) {
    int N = D.numInstances();
    int L = D.classIndex();
    double P[][] = new double[L][L];
    // @todo, can prune here to make even faster by pruning this.
    HashMap<LabelSet, Integer> map = MLUtils.countCombinationsSparse(D, L);

    for (LabelSet y : map.keySet()) {
        for (int j = 0; j < y.indices.length; j++) {
            int y_j = y.contains(j) ? 1 : 0;
            if (y_j > 0) {
                P[j][j] += (double) y_j; // C[j==1] ++
                for (int k = j + 1; k < y.indices.length; k++) {
                    int y_k = y.contains(j) ? 1 : 0;
                    P[j][k] += (double) y_k; // C[j==1,k==1] ++
                }
            }
        }
    }

    // @todo use getP(C,N) instead
    for (int j = 0; j < L; j++) {
        P[j][j] = Math.max(P[j][j] / (double) N, 0.0001);
        for (int k = j + 1; k < L; k++) {
            P[j][k] = Math.max(P[j][k] / (double) N, 0.0001);
        }
    }

    return P;
}

From source file:meka.core.StatUtils.java

License:Open Source License

/**
 * GetC - Get pairwise co-ocurrence counts from the training data D.
 * <br>/*  w w w.j  ava 2 s  .com*/
 * NOTE multi-label only
 * @return    C[][] where C[j][k] is the number of times where Y[i][j] = 1 and y[i][k] = 1 over all i = 1,...,N
 */
public static int[][] getC(Instances D) {

    int L = D.classIndex();
    int N = D.numInstances();

    int C[][] = new int[L][L];

    for (int i = 0; i < N; i++) {
        for (int j = 0; j < L; j++) {
            C[j][j] += (int) D.instance(i).value(j); // C[j==1] ++
            for (int k = j + 1; k < L; k++) {
                C[j][k] += (D.instance(i).value(j) + D.instance(i).value(k) >= 2.0) ? 1 : 0; // C[j==1,k==1] ++
            }
        }
    }
    return C;
}

From source file:meka.core.StatUtils.java

License:Open Source License

/**
 * Chi^2 - Do the chi-squared test on all pairs of labels.
 * @see #chi2(Instances, int, int)/*w  w  w  .jav a  2 s  . c  o m*/
 * @param   D   dataset
 * @return   The chi-square statistic matrix X
 */
public static double[][] chi2(Instances D) {
    int L = D.classIndex();
    double X[][] = new double[L][L];
    for (int j = 0; j < L; j++) {
        for (int k = j + 1; k < L; k++) {
            X[j][k] = chi2(D, j, k);
        }
    }
    return X;
}

From source file:meka.core.StatUtils.java

License:Open Source License

/**
 * MargDepMatrix - Get an Unconditional Depndency Matrix.
 * @param   D   dataset/*w w w  .j  ava2s. c o m*/
 * @param   op   how we will measure the dependency
 * @return a L*L matrix representing Unconditional Depndence.
 */
public static double[][] margDepMatrix(Instances D, String op) {

    int L = D.classIndex();
    int N = D.numInstances();

    // Simple Co-occurence counts
    if (op.equals("C")) {
        int C[][] = getApproxC(D);
        double P[][] = getP(C, N);
        return P;
    }
    // Mutual information -- complete / multi-target capable
    if (op.equals("I")) {
        return I(D, L);
    }
    // Mutual information -- binary (multi-label) approximation
    if (op.equals("Ib")) {
        int C[][] = getC(D);
        //System.out.println(""+M.toString(C));
        double P[][] = getP(C, N);
        //System.out.println(""+M.toString(P));
        return I(P);
    }
    // Mutual information -- fast binary (multi-label) approximation
    if (op.equals("Ibf")) {
        int C[][] = getApproxC(D);
        //System.out.println(""+M.toString(C));
        double P[][] = getP(C, N);
        //System.out.println(""+M.toString(P));
        return I(P);
    }
    // Conditional information -- binary (multi-label)
    if (op.equals("H")) {
        int C[][] = getC(D);
        return H(C, N);
    }
    // Conditional information -- fast binary (multi-label) approximation
    if (op.equals("H")) {
        int C[][] = getApproxC(D);
        return H(C, N);
    }
    // Chi-squared
    if (op.equals("X")) {
        return chi2(D);
    }
    // Frequencies (cheap)
    if (op.equals("F")) {
        double F[][] = F(D);
        //System.out.println(""+M.toString(F));
        return F;
    }
    /*
    if (op == "C") {
       return getC(D);
    }
    */
    System.err.println("No operation found; Using empty!");

    return new double[L][L];
}

From source file:meka.core.StatUtils.java

License:Open Source License

/**
 * H - Get a Conditional Entropy Matrix.
 *//*ww  w .jav a 2 s  . c o m*/
public static double[][] H(Instances D) {
    int C[][] = getC(D);
    return H(C, D.classIndex());
}

From source file:meka.core.StatUtils.java

License:Open Source License

/**
 * F - Relative frequency matrix (between p(j),p(k) and p(j,k)) in dataset D.
 *///from  w ww.j av a2s  .  c  om
public static double[][] F(Instances D) {
    int L = D.classIndex();
    double M[][] = new double[L][L];
    for (int j = 0; j < L; j++) {
        for (int k = j + 1; k < L; k++) {
            M[j][k] = Math.abs(1. - f(D, j, k));
        }
    }
    return M;
}