Example usage for weka.core Instances instance

List of usage examples for weka.core Instances instance

Introduction

In this page you can find the example usage for weka.core Instances instance.

Prototype



publicInstance instance(int index) 

Source Link

Document

Returns the instance at the given position.

Usage

From source file:meka.core.MLUtils.java

License:Open Source License

/**
 * GetXfromD - Extract labels as a double Y[][] from Instances D.
 * TODO: getYfromInstances would be a better name.
 *//*w  ww.  j a v a 2s  .  c o m*/
public static double[][] getYfromD(Instances D) {
    int L = D.classIndex();
    int N = D.numInstances();
    double Y[][] = new double[N][L];
    for (int i = 0; i < N; i++) {
        for (int k = 0; k < L; k++) {
            Y[i][k] = D.instance(i).value(k);
        }
    }
    return Y;
}

From source file:meka.core.MLUtils.java

License:Open Source License

/**
 * InsertZintoD - Insert data Z[][] to Instances D (e.g., as labels).
 * NOTE: Assumes binary labels!/*from w  w w.j  a va2 s . c  o m*/
 * @see #addZtoD(Instances, double[][], int)
 */
private static Instances insertZintoD(Instances D, double Z[][]) {

    int L = Z[0].length;

    // add attributes
    for (int j = 0; j < L; j++) {
        D.insertAttributeAt(new Attribute("c" + j, Arrays.asList(new String[] { "0", "1" })), j);
    }

    // add values Z[0]...Z[N] to D
    // (note that if D.numInstances() < Z.length, only some are added)
    for (int j = 0; j < L; j++) {
        for (int i = 0; i < D.numInstances(); i++) {
            D.instance(i).setValue(j, Z[i][j] > 0.5 ? 1.0 : 0.0);
        }
    }

    D.setClassIndex(L);
    return D;
}

From source file:meka.core.MLUtils.java

License:Open Source License

/**
 * AddZtoD - Add attribute space Z[N][H] (N rows of H columns) to Instances D, which should have N rows also.
 * @param   D    dataset (of N instances)
 * @param   Z   attribute space (of N rows, H columns)
 * @param   L   column to add Z from in D
 *//*  w w w. j  av  a2  s .c om*/
private static Instances addZtoD(Instances D, double Z[][], int L) {

    int H = Z[0].length;
    int N = D.numInstances();

    // add attributes
    for (int a = 0; a < H; a++) {
        D.insertAttributeAt(new Attribute("A" + a), L + a);
    }

    // add values Z[0]...Z[N] to D
    for (int a = 0; a < H; a++) {
        for (int i = 0; i < N; i++) {
            D.instance(i).setValue(L + a, Z[i][a]);
        }
    }

    D.setClassIndex(L);
    return D;
}

From source file:meka.core.PSUtils.java

License:Open Source License

/**
 * CountCombinationsSparseSubset - like CountCombinationsSparse, but only interested in 'indices[]' wrt 'D'.
 * @param   D      dataset //from   ww w. ja v a2 s  .c o  m
 * @param   indices   indices we are interested in
 * @return   a HashMap where a LabelSet representation of each label combination is associated with an Integer count, e.g., [3,7,14],3
 */
public static HashMap<LabelSet, Integer> countCombinationsSparseSubset(Instances D, int indices[]) {
    HashMap<LabelSet, Integer> map = new HashMap<LabelSet, Integer>();

    for (int i = 0; i < D.numInstances(); i++) {
        LabelSet m = new LabelSet(MLUtils.toSubIndicesSet(D.instance(i), indices));
        map.put(m, map.containsKey(m) ? map.get(m) + 1 : 1);
    }
    return map;
}

From source file:meka.core.PSUtils.java

License:Open Source License

/**
 * CountCombinationsSparse - return a mapping of each distinct label combination and its count.
 * @param   D   dataset /*from w  w w .j av  a2  s.c  om*/
 * @param   L   number of labels
 * @return   a HashMap where a LabelSet representation of each label combination is associated with an Integer count, e.g., [3,7,14],3
 */
public static final HashMap<LabelSet, Integer> countCombinationsSparse(Instances D, int L) {
    HashMap<LabelSet, Integer> map = new HashMap<LabelSet, Integer>();
    for (int i = 0; i < D.numInstances(); i++) {
        LabelSet y = new LabelSet(MLUtils.toSparseIntArray(D.instance(i), L));
        Integer c = map.get(y);
        map.put(y, c == null ? 1 : c + 1);
    }
    return map;
}

From source file:meka.core.PSUtils.java

License:Open Source License

/**
 * Transform instances into a multi-class representation.
 * @param D         original dataset/*w ww  . jav  a 2  s .  co m*/
 * @param L         number of labels in the original dataset
 * @param cname      class name for the new dataset (may want to encode the list of indices here for RAkEL-like methods)
 * @param p         pruning value
 * @param n         restoration value
 * @return transformed dataset
 */
public static Instances PSTransformation(Instances D, int L, String cname, int p, int n) {
    D = new Instances(D);

    // Gather combinations
    HashMap<LabelSet, Integer> distinctCombinations = PSUtils.countCombinationsSparse(D, L);

    // Prune combinations
    if (p > 0)
        MLUtils.pruneCountHashMap(distinctCombinations, p);

    // Check there are > 2
    if (distinctCombinations.size() <= 1 && p > 0) {
        // ... or try again if not ...
        System.err.println("[Warning] You did too much pruning, setting P = P-1");
        return PSTransformation(D, L, cname, p - 1, n);
    }

    // Create class attribute
    ArrayList<String> ClassValues = new ArrayList<String>();
    for (LabelSet y : distinctCombinations.keySet())
        ClassValues.add(y.toString());
    Attribute C = new Attribute(cname, ClassValues);

    // Insert new special attribute (which has all possible combinations of labels) 
    D.insertAttributeAt(C, L);
    D.setClassIndex(L);

    //Add class values
    int N = D.numInstances();
    for (int i = 0; i < N; i++) {
        Instance x = D.instance(i);
        LabelSet y = new LabelSet(MLUtils.toSparseIntArray(x, L));
        String y_string = y.toString();

        // add it
        if (ClassValues.contains(y_string)) //if its class value exists
            x.setClassValue(y_string);
        // decomp
        else if (n > 0) {
            //String d_subsets[] = getTopNSubsets(comb,distinctCombinations,n);
            LabelSet d_subsets[] = PSUtils.getTopNSubsets(y, distinctCombinations, n);
            //LabelSet d_subsets[] = PSUtils.cover(y,distinctCombinations);
            if (d_subsets.length > 0) {
                // fast
                x.setClassValue(d_subsets[0].toString());
                // additional
                if (d_subsets.length > 1) {
                    for (int s_i = 1; s_i < d_subsets.length; s_i++) {
                        Instance x_ = (Instance) (x).copy();
                        x_.setClassValue(d_subsets[s_i].toString());
                        D.add(x_);
                    }
                }
            } else {
                x.setClassMissing();
            }
        }
    }

    // remove with missing class
    D.deleteWithMissingClass();

    try {
        D = F.removeLabels(D, L);
    } catch (Exception e) {
        // should never happen
    }
    D.setClassIndex(0);

    return D;
}

From source file:meka.core.PSUtils.java

License:Open Source License

/**
 * Transform instances into a multi-class representation.
 * @param D         original dataset//w w w. j  av a 2 s  . co  m
 * @param L         number of labels in that dataset
 * @param cname      class name for the new dataset (may want to encode the list of indices here for RAkEL-like methods)
 * @param p         pruning value
 * @param n         restoration value
 * @return transformed dataset
 */
public static Instances SLTransformation(Instances D, int L, String cname, int p, int n) {
    D = new Instances(D);

    // Gather combinations
    HashMap<LabelSet, Integer> distinctCombinations = PSUtils.countCombinationsSparse(D, L);

    // Prune combinations
    if (p > 0)
        MLUtils.pruneCountHashMap(distinctCombinations, p);

    // Check there are > 2
    if (distinctCombinations.size() <= 1 && p > 0) {
        // ... or try again if not ...
        System.err.println("[Warning] You did too much pruning, setting P = P-1");
        return PSTransformation(D, L, cname, p - 1, n);
    }

    // Create class attribute
    ArrayList<String> ClassValues = new ArrayList<String>();
    for (LabelSet y : distinctCombinations.keySet())
        ClassValues.add(y.toString());
    Attribute C = new Attribute(cname, ClassValues);

    // Insert new special attribute (which has all possible combinations of labels)
    D.insertAttributeAt(C, L);
    D.setClassIndex(L);

    //Add class values
    int N = D.numInstances();
    for (int i = 0; i < N; i++) {
        Instance x = D.instance(i);
        LabelSet y = new LabelSet(MLUtils.toSparseIntArray(x, L));
        String y_string = y.toString();

        // add it
        if (ClassValues.contains(y_string)) //if its class value exists
            x.setClassValue(y_string);
        // decomp
        else if (n > 0) {
            //String d_subsets[] = getTopNSubsets(comb,distinctCombinations,n);
            LabelSet d_subsets[] = PSUtils.getTopNSubsets(y, distinctCombinations, n);
            //LabelSet d_subsets[] = PSUtils.cover(y,distinctCombinations);
            if (d_subsets.length > 0) {
                // fast
                x.setClassValue(d_subsets[0].toString());
                // additional
                if (d_subsets.length > 1) {
                    for (int s_i = 1; s_i < d_subsets.length; s_i++) {
                        Instance x_ = (Instance) (x).copy();
                        x_.setClassValue(d_subsets[s_i].toString());
                        D.add(x_);
                    }
                }
            } else {
                x.setClassMissing();
            }
        }
    }

    // remove with missing class
    D.deleteWithMissingClass();

    try {
        D = F.removeLabels(D, L);
    } catch (Exception e) {
        // should never happen
    }
    D.setClassIndex(0);

    return D;
}

From source file:meka.core.StatUtils.java

License:Open Source License

/**
 * jPMF - Joint PMF.//from w  ww.j a v  a2  s  .com
 * @return the joint PMF of the j-th and k-th labels in D.
 */
public static double[][] jPMF(Instances D, int j, int k) {
    double JOINT[][] = new double[D.attribute(j).numValues()][D.attribute(k).numValues()];
    int N = D.numInstances();
    for (int i = 0; i < N; i++) {
        int v_j = (int) Math.round(D.instance(i).value(j));
        int v_k = (int) Math.round(D.instance(i).value(k));
        JOINT[v_j][v_k] += (1.0 / (double) N);
    }
    return JOINT;
}

From source file:meka.core.StatUtils.java

License:Open Source License

/**
 * Joint Distribution./*from w ww  . j a  va 2s  .  com*/
 * @return the joint PMF of the j-th and k-th and lthlabels in D.
 */
public static double[][][] jPMF(Instances D, int j, int k, int l) {
    double JOINT[][][] = new double[D.attribute(j).numValues()][D.attribute(k).numValues()][D.attribute(l)
            .numValues()];
    int N = D.numInstances();
    for (int i = 0; i < N; i++) {
        int v_j = (int) Math.round(D.instance(i).value(j));
        int v_k = (int) Math.round(D.instance(i).value(k));
        int v_l = (int) Math.round(D.instance(i).value(l));
        JOINT[v_j][v_k][v_l] += (1.0 / (double) N);
    }
    return JOINT;
}

From source file:meka.core.StatUtils.java

License:Open Source License

/**
 * GetC - Get pairwise co-ocurrence counts from the training data D.
 * <br>//from   ww  w. ja v a 2s . c  om
 * NOTE multi-label only
 * @return    C[][] where C[j][k] is the number of times where Y[i][j] = 1 and y[i][k] = 1 over all i = 1,...,N
 */
public static int[][] getC(Instances D) {

    int L = D.classIndex();
    int N = D.numInstances();

    int C[][] = new int[L][L];

    for (int i = 0; i < N; i++) {
        for (int j = 0; j < L; j++) {
            C[j][j] += (int) D.instance(i).value(j); // C[j==1] ++
            for (int k = j + 1; k < L; k++) {
                C[j][k] += (D.instance(i).value(j) + D.instance(i).value(k) >= 2.0) ? 1 : 0; // C[j==1,k==1] ++
            }
        }
    }
    return C;
}