Example usage for weka.core Instances classIndex

List of usage examples for weka.core Instances classIndex

Introduction

In this page you can find the example usage for weka.core Instances classIndex.

Prototype


publicint classIndex() 

Source Link

Document

Returns the class attribute's index.

Usage

From source file:meka.classifiers.multilabel.BRq.java

License:Open Source License

@Override
public void buildClassifier(Instances data) throws Exception {
    testCapabilities(data);/*from ww  w. j av  a 2s . co m*/

    int c = data.classIndex();

    if (getDebug())
        System.out.print("-: Creating " + c + " models (" + m_Classifier.getClass().getName() + "): ");
    m_MultiClassifiers = AbstractClassifier.makeCopies(m_Classifier, c);

    Instances sub_data = null;

    for (int i = 0; i < c; i++) {

        int indices[][] = new int[c][c - 1];
        for (int j = 0, k = 0; j < c; j++) {
            if (j != i) {
                indices[i][k++] = j;
            }
        }

        //Select only class attribute 'i'
        Remove FilterRemove = new Remove();
        FilterRemove.setAttributeIndicesArray(indices[i]);
        FilterRemove.setInputFormat(data);
        FilterRemove.setInvertSelection(true);
        sub_data = Filter.useFilter(data, FilterRemove);
        sub_data.setClassIndex(0);
        /* BEGIN downsample for this link */
        sub_data.randomize(m_Random);
        int numToRemove = sub_data.numInstances()
                - (int) Math.round(sub_data.numInstances() * m_DownSampleRatio);
        for (int m = 0, removed = 0; m < sub_data.numInstances(); m++) {
            if (sub_data.instance(m).classValue() <= 0.0) {
                sub_data.instance(m).setClassMissing();
                if (++removed >= numToRemove)
                    break;
            }
        }
        sub_data.deleteWithMissingClass();
        /* END downsample for this link */

        //Build the classifier for that class
        m_MultiClassifiers[i].buildClassifier(sub_data);
        if (getDebug())
            System.out.print(" " + (i + 1));

    }

    if (getDebug())
        System.out.println(" :-");

    m_InstancesTemplate = new Instances(sub_data, 0);

}

From source file:meka.classifiers.multilabel.cc.CNode.java

License:Open Source License

/**
 * Transform - transform dataset D for this node.
 * this.j defines the current node index, e.g., 3
 * this.paY[] defines parents,            e.g., [1,4]
 * we should remove the rest,             e.g., [0,2,5,...,L-1]
 * @return dataset we should remove all variables from D EXCEPT current node, and parents.
 *//*from   ww w  .  j  a v  a 2 s  .  com*/
public Instances transform(Instances D) throws Exception {
    int L = D.classIndex();
    d = D.numAttributes() - L;
    int keep[] = A.append(this.paY, j); // keep all parents and self!
    Arrays.sort(keep);
    int remv[] = A.invert(keep, L); // i.e., remove the rest < L
    Arrays.sort(remv);
    map = new int[L];
    for (int j = 0; j < L; j++) {
        map[j] = Arrays.binarySearch(keep, j);
    }
    Instances D_ = F.remove(new Instances(D), remv, false);
    D_.setClassIndex(map[this.j]);
    return D_;
}

From source file:meka.classifiers.multilabel.cc.CNode.java

License:Open Source License

/**
 * Transform./*ww w. j  ava2s  . c o  m*/
 * @param   D      original Instances
 * @param   c      to be the class Attribute
 * @param   pa_c   the parent indices of c
 * @return   new Instances T
 */
public static Instances transform(Instances D, int c, int pa_c[]) throws Exception {
    int L = D.classIndex();
    int keep[] = A.append(pa_c, c); // keep all parents and self!
    Arrays.sort(keep);
    int remv[] = A.invert(keep, L); // i.e., remove the rest < L
    Arrays.sort(remv);
    Instances T = F.remove(new Instances(D), remv, false);
    int map[] = new int[L];
    for (int j = 0; j < L; j++) {
        map[j] = Arrays.binarySearch(keep, j);
    }
    T.setClassIndex(map[c]);
    return T;
}

From source file:meka.classifiers.multilabel.CCq.java

License:Open Source License

@Override
public void buildClassifier(Instances Train) throws Exception {
    testCapabilities(Train);/*from  ww  w. j  ava2  s  .c  o  m*/

    this.m_NumClasses = Train.classIndex();

    int indices[] = MLUtils.gen_indices(m_NumClasses);
    MLUtils.randomize(indices, new Random(m_S));
    if (getDebug())
        System.out.print(":- Chain (");
    root = new QLink(indices, 0, Train);
    if (getDebug())
        System.out.println(" ) -:");
}

From source file:meka.classifiers.multilabel.CDN.java

License:Open Source License

@Override
public void buildClassifier(Instances D) throws Exception {
    testCapabilities(D);//  www . j a  va  2 s  .c om

    int N = D.numInstances();
    int L = D.classIndex();
    h = new Classifier[L];
    m_R = new Random(m_S);
    D_templates = new Instances[L];

    // Build L probabilistic models, each to predict Y_i | X, Y_{-y}; save the templates.
    for (int j = 0; j < L; j++) {
        // X = [Y[0],...,Y[j-1],Y[j+1],...,Y[L],X]
        D_templates[j] = new Instances(D);
        D_templates[j].setClassIndex(j);
        // train H[j] : X -> Y
        h[j] = AbstractClassifier.forName(getClassifier().getClass().getName(),
                ((AbstractClassifier) getClassifier()).getOptions());
        h[j].buildClassifier(D_templates[j]);
    }
}

From source file:meka.classifiers.multilabel.CDT.java

License:Open Source License

@Override
public void buildClassifier(Instances D) throws Exception {
    testCapabilities(D);/*from  ww  w  .jav a 2 s.  com*/

    int L = D.classIndex();
    int d = D.numAttributes() - L;
    m_R = new Random(getSeed());
    int width = m_Width;

    if (m_Width < 0)
        width = (int) Math.sqrt(L);
    else if (m_Width == 0) {
        width = L;
    }

    nodes = new CNode[L];
    /*
     * Make the Trellis.
     */
    if (getDebug())
        System.out.println("Make Trellis of width " + m_Width);
    int indices[] = A.make_sequence(L);
    A.shuffle(indices, new Random(getSeed()));
    trel = new Trellis(indices, width, m_Density);
    if (getDebug())
        System.out.println("==>\n" + trel.toString());

    /* Rearrange the Trellis */
    if (!m_DependencyMetric.equals("None"))
        trel = CT.orderTrellis(trel, StatUtils.margDepMatrix(D, m_DependencyMetric), m_R);

    /*
     * Build Trellis
     */
    if (getDebug())
        System.out.println("Build Trellis");

    if (getDebug())
        System.out.println("nodes: " + Arrays.toString(trel.indices));

    for (int j = 0; j < L; j++) {
        int jv = trel.indices[j];
        if (getDebug()) {
            System.out.println("Build Node h_" + jv + "] : P(y_" + jv + " | x_[1:d], y_"
                    + Arrays.toString(trel.getNeighbours(j)) + ")");
        }
        nodes[jv] = new CNode(jv, null, trel.getNeighbours(j));
        nodes[jv].build(D, m_Classifier);
    }

}

From source file:meka.classifiers.multilabel.DBPNN.java

License:Open Source License

@Override
public void buildClassifier(Instances D) throws Exception {
    testCapabilities(D);//from w w w .j ava 2  s .  co m

    // Extract variables

    int L = D.classIndex();
    int d = D.numAttributes() - L;
    double X_[][] = MLUtils.getXfromD(D);
    double Y_[][] = MLUtils.getYfromD(D);

    // Build an RBM
    if (getDebug())
        System.out.println("Build RBM(s) ... ");

    String ops[] = this.getOptions();
    dbm = new DBM(ops);
    dbm.setE(m_E);
    ((DBM) dbm).setH(m_H, m_N);

    long before = System.currentTimeMillis();
    dbm.train(X_, m_H); // batch train
    rbm_time = System.currentTimeMillis() - before;

    if (getDebug()) {
        Matrix tW[] = dbm.getWs();
        System.out.println("X = \n" + MatrixUtils.toString(X_));
        for (int l = 0; l < tW.length; l++) {
            System.out.println("W = \n" + MatrixUtils.toString(tW[l].getArray()));
        }
        System.out.println("Y = \n" + MatrixUtils.toString(Y_));
    }

    /* Trim W's: instead of (d+1 x h+1), they become (d+1, h)
    wwb      ww                                                     
    wwb      ww                                                     
    wwb  ->  ww                                                     
    wwb      ww                                                     
    bbb                                                             
         (this is because RBMs go both ways -- have biases both ways -- whereas BP only goes up)
         TODO the best thing would be to keep different views of the same array ...
      */

    Matrix W[] = trimBiases(dbm.getWs());

    // Back propagate with batch size of 1 to fine tune the DBM into a supervised DBN
    if (m_Classifier instanceof BPNN) {
        if (getDebug())
            System.out.println("You have chosen to use BPNN (good!)");
    } else {
        System.err.println(
                "[WARNING] Was expecting BPNN as the base classifier (will set it now, with default parameters) ...");
        m_Classifier = new BPNN();
    }

    int i_Y = W.length - 1; // the final W
    W[i_Y] = RBM.makeW(W[i_Y].getRowDimension() - 1, W[i_Y].getColumnDimension() - 1, new Random(1)); // 
    ((BPNN) m_Classifier).presetWeights(W, L); // this W will be modified
    ((BPNN) m_Classifier).train(X_, Y_); // could also have called buildClassifier(D)

    /*
    for(int i = 0; i < 1000; i++) {
       double E = ((BPNN)m_Classifier).update(X_,Y_);
       //double Ypred[][] = ((BPNN)m_Classifier).popY(X_);
       System.out.println("i="+i+", MSE="+E);
    }
    */

    if (getDebug()) {
        Matrix tW[] = W;
        //System.out.println("X = \n"+M.toString(X_));
        System.out.println("W = \n" + MatrixUtils.toString(tW[0].getArray()));
        System.out.println("W = \n" + MatrixUtils.toString(tW[1].getArray()));
        double Ypred[][] = ((BPNN) m_Classifier).popY(X_);
        System.out.println("Y = \n" + MatrixUtils.toString(MatrixUtils.threshold(Ypred, 0.5)));
        //System.out.println("Z = \n"+M.toString(M.threshold(Z,0.5)));
    }
}

From source file:meka.classifiers.multilabel.Evaluation.java

License:Open Source License

/**
 * RunExperiment - Build and evaluate a model with command-line options.
 * @param   h      multi-label classifier
 * @param   options   command line options
 *///  w  w  w . j  ava  2  s.c o  m
public static void runExperiment(MultiLabelClassifier h, String options[]) throws Exception {

    // Help
    if (Utils.getOptionPos('h', options) >= 0) {
        System.out.println("\nHelp requested");
        Evaluation.printOptions(h.listOptions());
        return;
    }

    h.setOptions(options);

    if (h.getDebug())
        System.out.println("Loading and preparing dataset ...");

    // Load Instances from a file
    Instances D_train = loadDataset(options);

    Instances D_full = D_train;

    // Try extract and set a class index from the @relation name
    MLUtils.prepareData(D_train);

    // Override the number of classes with command-line option (optional)
    if (Utils.getOptionPos('C', options) >= 0) {
        int L = Integer.parseInt(Utils.getOption('C', options));
        D_train.setClassIndex(L);
    }

    // We we still haven't found -C option, we can't continue (don't know how many labels)
    int L = D_train.classIndex();
    if (L <= 0) {
        throw new Exception(
                "[Error] Number of labels not specified.\n\tYou must set the number of labels with the -C option, either inside the @relation tag of the Instances file, or on the command line.");
        // apparently the dataset didn't contain the '-C' flag, check in the command line options ...
    }

    // Randomize (Instances) 
    int seed = (Utils.getOptionPos('s', options) >= 0) ? Integer.parseInt(Utils.getOption('s', options)) : 0;
    if (Utils.getFlag('R', options)) {
        D_train.randomize(new Random(seed));
    }
    boolean Threaded = false;
    if (Utils.getOptionPos("Thr", options) >= 0) {
        Threaded = Utils.getFlag("Thr", options);
    }

    // Verbosity Option
    String voption = "1";
    if (Utils.getOptionPos("verbosity", options) >= 0) {
        voption = Utils.getOption("verbosity", options);
    }

    // Save for later?
    //String fname = null;
    //if (Utils.getOptionPos('f',options) >= 0) {
    //   fname = Utils.getOption('f',options);
    //}
    // Dump for later?
    String dname = null;
    if (Utils.getOptionPos('d', options) >= 0) {
        dname = Utils.getOption('d', options);
    }
    // Load from file?
    String lname = null;
    Instances dataHeader = null;
    if (Utils.getOptionPos('l', options) >= 0) {
        lname = Utils.getOption('l', options);
        Object[] data = SerializationHelper.readAll(lname);
        h = (MultiLabelClassifier) data[0];
        if (data.length > 1)
            dataHeader = (Instances) data[1];
        //Object o[] = SerializationHelper.readAll(lname);
        //h = (MultilabelClassifier)o[0];
    }

    try {

        Result r = null;

        // Threshold OPtion
        String top = "PCut1"; // default
        if (Utils.getOptionPos("threshold", options) >= 0)
            top = Utils.getOption("threshold", options);

        if (Utils.getOptionPos('x', options) >= 0) {
            // CROSS-FOLD-VALIDATION

            int numFolds = MLUtils.getIntegerOption(Utils.getOption('x', options), 10); // default 10
            // Check for remaining options
            Utils.checkForRemainingOptions(options);
            r = Evaluation.cvModel(h, D_train, numFolds, top, voption);
            System.out.println(r.toString());
        } else {
            // TRAIN-TEST SPLIT

            Instances D_test = null;

            if (Utils.getOptionPos('T', options) >= 0) {
                // load separate test set
                try {
                    D_test = loadDataset(options, 'T');
                    MLUtils.prepareData(D_test);
                } catch (Exception e) {
                    throw new Exception("[Error] Failed to Load Test Instances from file.", e);
                }
            } else {
                // split training set into train and test sets
                // default split
                int N_T = (int) (D_train.numInstances() * 0.60);
                if (Utils.getOptionPos("split-percentage", options) >= 0) {
                    // split by percentage
                    double percentTrain = Double.parseDouble(Utils.getOption("split-percentage", options));
                    N_T = (int) Math.round((D_train.numInstances() * (percentTrain / 100.0)));
                } else if (Utils.getOptionPos("split-number", options) >= 0) {
                    // split by number
                    N_T = Integer.parseInt(Utils.getOption("split-number", options));
                }

                int N_t = D_train.numInstances() - N_T;
                D_test = new Instances(D_train, N_T, N_t);
                D_train = new Instances(D_train, 0, N_T);

            }

            // Invert the split?
            if (Utils.getFlag('i', options)) { //boolean INVERT          = Utils.getFlag('i',options);
                Instances temp = D_test;
                D_test = D_train;
                D_train = temp;
            }

            // Check for remaining options
            Utils.checkForRemainingOptions(options);

            if (h.getDebug())
                System.out.println(":- Dataset -: " + MLUtils.getDatasetName(D_train) + "\tL=" + L
                        + "\tD(t:T)=(" + D_train.numInstances() + ":" + D_test.numInstances() + ")\tLC(t:T)="
                        + Utils.roundDouble(MLUtils.labelCardinality(D_train, L), 2) + ":"
                        + Utils.roundDouble(MLUtils.labelCardinality(D_test, L), 2) + ")");

            if (lname != null) {
                // h is already built, and loaded from a file, test it!
                r = testClassifier(h, D_test);

                String t = top;

                if (top.startsWith("PCut")) {
                    // if PCut is specified we need the training data,
                    // so that we can calibrate the threshold!
                    t = MLEvalUtils.getThreshold(r.predictions, D_train, top);
                }
                r = evaluateModel(h, D_test, t, voption);
            } else {
                //check if train and test set size are > 0
                if (D_train.numInstances() > 0 && D_test.numInstances() > 0) {
                    if (Threaded) {
                        r = evaluateModelM(h, D_train, D_test, top, voption);
                    } else {

                        r = evaluateModel(h, D_train, D_test, top, voption);
                    }
                } else {
                    // otherwise just train on full set. Maybe better throw an exception.
                    h.buildClassifier(D_full);

                }
            }

            // @todo, if D_train==null, assume h is already trained
            if (D_train.numInstances() > 0 && D_test.numInstances() > 0) {
                System.out.println(r.toString());
            }
        }

        // Save model to file?
        if (dname != null) {
            dataHeader = new Instances(D_train, 0);
            SerializationHelper.writeAll(dname, new Object[] { h, dataHeader });
        }

    } catch (Exception e) {
        e.printStackTrace();
        Evaluation.printOptions(h.listOptions());
        System.exit(1);
    }

    System.exit(0);
}

From source file:meka.classifiers.multilabel.Evaluation.java

License:Open Source License

/**
 * IsMT - see if dataset D is multi-target (else only multi-label)
 * @param   D   data//  ww  w .  j a v a2 s .  c  o m
 * @return   true iff D is multi-target only (else false)
 */
public static boolean isMT(Instances D) {
    int L = D.classIndex();
    for (int j = 0; j < L; j++) {
        if (D.attribute(j).isNominal()) {
            // Classification
            if (D.attribute(j).numValues() > 2) {
                // Multi-class
                return true;
            }
        } else {
            // Regression?
            System.err.println("[Warning] Found a non-nominal class -- not sure how this happened?");
        }
    }
    return false;
}

From source file:meka.classifiers.multilabel.Evaluation.java

License:Open Source License

/**
 * CVModel - Split D into train/test folds, and then train and evaluate on each one.
 * @param   h       a multi-output classifier
 * @param   D          test data Instances
 * @param   numFolds number of folds of CV
 * @param   top        Threshold OPtion (pertains to multi-label data only)
 * @param   vop       Verbosity OPtion (which measures do we want to calculate/output)
 * @return   Result   raw prediction data with evaluation statistics included.
 *//* w w  w .  j  a v a2s  .  co  m*/
public static Result cvModel(MultiLabelClassifier h, Instances D, int numFolds, String top, String vop)
        throws Exception {
    Result r_[] = new Result[numFolds];
    for (int i = 0; i < numFolds; i++) {
        Instances D_train = D.trainCV(numFolds, i);
        Instances D_test = D.testCV(numFolds, i);
        if (h.getDebug())
            System.out.println(":- Fold [" + i + "/" + numFolds + "] -: " + MLUtils.getDatasetName(D) + "\tL="
                    + D.classIndex() + "\tD(t:T)=(" + D_train.numInstances() + ":" + D_test.numInstances()
                    + ")\tLC(t:T)=" + Utils.roundDouble(MLUtils.labelCardinality(D_train, D.classIndex()), 2)
                    + ":" + Utils.roundDouble(MLUtils.labelCardinality(D_test, D.classIndex()), 2) + ")");
        r_[i] = evaluateModel(h, D_train, D_test); // <-- should not run stats yet!
    }
    Result r = MLEvalUtils.combinePredictions(r_);
    if (h instanceof MultiTargetClassifier || isMT(D)) {
        r.setInfo("Type", "MT-CV");
    } else if (h instanceof MultiLabelClassifier) {
        r.setInfo("Type", "ML-CV");
        try {
            r.setInfo("Threshold", String.valueOf(Double.parseDouble(top)));
        } catch (Exception e) {
            System.err.println(
                    "[WARNING] Automatic threshold calibration not currently enabled for cross-fold validation, setting threshold = 0.5.\n");
            r.setInfo("Threshold", String.valueOf(0.5));
        }
    }
    r.setInfo("Verbosity", vop);
    r.output = Result.getStats(r, vop);
    // Need to reset this because of CV
    r.setValue("Number of training instances", D.numInstances());
    r.setValue("Number of test instances", D.numInstances());
    return r;
}