Example usage for weka.core Instances instance

List of usage examples for weka.core Instances instance

Introduction

In this page you can find the example usage for weka.core Instances instance.

Prototype



publicInstance instance(int index) 

Source Link

Document

Returns the instance at the given position.

Usage

From source file:meka.classifiers.multilabel.incremental.IncrementalEvaluation.java

License:Open Source License

/**
 * EvaluateModelBatchWindow - Evaluate a multi-label data-stream model over windows.
 * @param   h   Multilabel Classifier/*from   w  w w .  j av  a2  s. co  m*/
 * @param    D   stream
 * @param   numWindows   number of windows
 * @param   rLabeled   labelled-ness (1.0 by default)
 * @param   Top   threshold option
 * @param   Vop   verbosity option
 * @return   The Result on the final window (but it contains samples of all the other evaluated windows).
 * The window is sampled every N/numWindows instances, for a total of numWindows windows.
 */
public static Result evaluateModelBatchWindow(MultiLabelClassifier h, Instances D, int numWindows,
        double rLabeled, String Top, String Vop) throws Exception {

    if (h.getDebug())
        System.out
                .println(":- Classifier -: " + h.getClass().getName() + ": " + Arrays.toString(h.getOptions()));

    int N = D.numInstances();
    int L = D.classIndex();

    // the Result to use
    Result result = null;
    // the samples of all windows
    ArrayList<HashMap<String, Object>> samples = new ArrayList<HashMap<String, Object>>();

    long train_time = 0;
    long test_time = 0;

    int windowSize = (int) Math.floor(D.numInstances() / (double) numWindows);

    if (rLabeled * windowSize < 1.)
        throw new Exception("[Error] The ratio of labelled instances (" + rLabeled
                + ") is too small given the window size!");

    double nth = 1. / rLabeled; // label every nth example

    Instances D_init = new Instances(D, 0, windowSize); // initial window

    if (h.getDebug()) {
        System.out.println("Training classifier on initial window ...");
    }
    train_time = System.currentTimeMillis();
    h.buildClassifier(D_init); // initial classifier
    train_time = System.currentTimeMillis() - train_time;
    if (h.getDebug()) {
        System.out.println("Done (in " + (train_time / 1000.0) + " s)");
    }
    D = new Instances(D, windowSize, D.numInstances() - windowSize); // the rest (after the initial window)

    double t[] = new double[L];
    Arrays.fill(t, 0.5);

    int V = MLUtils.getIntegerOption(Vop, 3);
    if (h.getDebug()) {
        System.out.println("--------------------------------------------------------------------------------");
        System.out.print("#" + Utils.padLeft("w", 6) + " " + Utils.padLeft("n", 6));
        for (String m : measures) {
            System.out.print(" ");
            System.out.print(Utils.padLeft(m, 12));
        }
        System.out.println("");
        System.out.println("--------------------------------------------------------------------------------");
    }

    int i = 0;
    for (int w = 0; w < numWindows - 1; w++) {
        // For each evaluation window ...

        result = new Result(L);
        result.setInfo("Supervision", String.valueOf(rLabeled));
        result.setInfo("Type", "MLi");

        int n = 0;
        test_time = 0;
        train_time = 0;

        for (int c = 0; i < (w * windowSize) + windowSize; i++) {
            // For each instance in the evaluation window ...

            Instance x = D.instance(i);
            AbstractInstance x_ = (AbstractInstance) ((AbstractInstance) x).copy(); // copy 
            // (we can't clear the class values because certain classifiers need to know how well they're doing -- just trust that there's no cheating!)
            //for(int j = 0; j < L; j++)  
            //   x_.setValue(j,0.0);

            if (rLabeled < 0.5 && (i % (int) (1 / rLabeled) == 0)
                    || (rLabeled >= 0.5 && (i % (int) (1. / (1. - rLabeled)) != 0))) {
                // LABELLED - Test & record prediction 
                long before_test = System.currentTimeMillis();
                double y[] = h.distributionForInstance(x_);
                long after_test = System.currentTimeMillis();
                test_time += (after_test - before_test); // was +=
                result.addResult(y, x);
                n++;
            } else {
                // UNLABELLED
                x = MLUtils.setLabelsMissing(x, L);
            }

            // Update the classifier. (The classifier will have to decide if it wants to deal with unlabelled instances.)
            long before = System.currentTimeMillis();
            ((UpdateableClassifier) h).updateClassifier(x);
            long after = System.currentTimeMillis();
            train_time += (after - before); // was +=
        }

        // calculate results
        result.setInfo("Threshold", Arrays.toString(t));
        result.output = Result.getStats(result, Vop);
        result.setMeasurement("Test time", (test_time) / 1000.0);
        result.setMeasurement("Build time", (train_time) / 1000.0);
        result.setMeasurement("Total time", (test_time + train_time) / 1000.0);
        result.setMeasurement("Threshold", (double) t[0]);
        result.setMeasurement("Instances", (double) i);
        result.setMeasurement("Samples", (double) (samples.size() + 1));
        samples.add(result.output);

        // Display results (to CLI)
        if (h.getDebug()) {
            System.out.print("#" + Utils.doubleToString((double) w + 1, 6, 0) + " "
                    + Utils.doubleToString((double) n, 6, 0));
            n = 0;
            for (String m : measures) {
                System.out.print(" ");
                System.out.print(Utils.doubleToString((Double) result.getMeasurement(m), 12, 4));
            }
            System.out.println("");
        }

        // Calibrate threshold for next window
        if (Top.equals("PCutL")) {
            t = ThresholdUtils.calibrateThresholds(result.predictions,
                    MLUtils.labelCardinalities(result.actuals));
        } else {
            Arrays.fill(t, ThresholdUtils.calibrateThreshold(result.predictions,
                    MLUtils.labelCardinality(result.allTrueValues())));
        }

    }

    if (h.getDebug()) {
        System.out.println("--------------------------------------------------------------------------------");
    }

    // This is the last Result; prepare it for evaluation output.
    result.setInfo("Classifier", h.getClass().getName());
    result.vals.put("Test time", (test_time) / 1000.0);
    result.vals.put("Build time", (train_time) / 1000.0);
    result.vals.put("Total time", (test_time + train_time) / 1000.0);
    result.vals.put("Total instances tested", (double) i);
    result.vals.put("Initial instances for training", (double) windowSize);
    result.setInfo("Options", Arrays.toString(h.getOptions()));
    result.setInfo("Additional Info", h.toString());
    result.setInfo("Dataset", MLUtils.getDatasetName(D));
    result.output = Result.getStats(result, Vop);
    result.setMeasurement("Results sampled over time", Result.getResultsAsInstances(samples));

    return result;
}

From source file:meka.classifiers.multilabel.incremental.IncrementalEvaluation.java

License:Open Source License

/**
 * Prequential Evaluation - Accuracy since the start of evaluation.
 * @param   h   Multilabel Classifier// www.  j  a  v a2  s .com
 * @param    D   stream
 * @param   windowSize   sampling frequency (of evaluation statistics)
 * @param   rLabeled   labelled-ness (1.0 by default)
 * @param   Top   threshold option
 * @param   Vop   verbosity option
 * The window is sampled every N/numWindows instances, for a total of numWindows windows.
 */
public static Result evaluateModelPrequentialBasic(MultiLabelClassifier h, Instances D, int windowSize,
        double rLabeled, String Top, String Vop) throws Exception {

    if (h.getDebug())
        System.out
                .println(":- Classifier -: " + h.getClass().getName() + ": " + Arrays.toString(h.getOptions()));

    int L = D.classIndex();

    Result result = new Result();

    long train_time = 0;
    long test_time = 0;

    double nth = 1. / rLabeled; // label every nth example
    result.setInfo("Supervision", String.valueOf(rLabeled));

    Instances D_init = new Instances(D, 0, windowSize); // initial window

    if (h.getDebug()) {
        System.out.println("Training classifier on initial window (of size " + windowSize + ") ...");
    }

    train_time = System.currentTimeMillis();
    h.buildClassifier(D_init); // initial classifir
    train_time = System.currentTimeMillis() - train_time;

    D = new Instances(D, windowSize, D.numInstances() - windowSize); // the rest (after the initial window)

    if (h.getDebug()) {
        System.out.println(
                "Proceeding to Test/Label/Update cycle on remaining (" + D.numInstances() + ") instances ...");
    }

    result.setInfo("Classifier", h.getClass().getName());
    result.setInfo("Options", Arrays.toString(h.getOptions()));
    result.setInfo("Additional Info", h.toString());
    result.setInfo("Dataset", MLUtils.getDatasetName(D));
    result.setInfo("Verbosity", Vop);
    if (h instanceof MultiTargetClassifier || Evaluation.isMT(D)) {
        result.setInfo("Type", "MT");
    } else {
        result.setInfo("Type", "ML");
        double t = 0.5;
        try {
            t = Double.parseDouble(Top);
        } catch (Exception e) {
            System.err.println(
                    "[WARNING] Only a single threshold can be chosen for this kind of evaluation; Using " + t);
        }
        result.setInfo("Threshold", String.valueOf(t));
    }
    ArrayList<HashMap<String, Object>> samples = new ArrayList<HashMap<String, Object>>();

    for (int i = 0; i < D.numInstances(); i++) {

        Instance x = D.instance(i);
        AbstractInstance x_ = (AbstractInstance) ((AbstractInstance) x).copy(); // copy 

        /*
         * TEST
         */
        long before_test = System.currentTimeMillis();
        double y[] = h.distributionForInstance(x_);
        long after_test = System.currentTimeMillis();
        test_time += (after_test - before_test);
        result.addResult(y, x);

        /*
         * LABEL BECOMES AVAILABLE ?
         */
        if (rLabeled >= 0.5) {
            x = MLUtils.setLabelsMissing(x, L);
        }

        /*
         * UPDATE
         * (The classifier will have to decide if it wants to deal with unlabelled instances.)
         */
        long before = System.currentTimeMillis();
        ((UpdateableClassifier) h).updateClassifier(x);
        long after = System.currentTimeMillis();
        train_time += (after - before);

        /*
         * RECORD MEASUREMENT
         */
        if (i % windowSize == (windowSize - 1)) {
            HashMap<String, Object> eval_sample = Result.getStats(result, Vop);
            eval_sample.put("Test time", (test_time) / 1000.0);
            eval_sample.put("Build time", (train_time) / 1000.0);
            eval_sample.put("Total time", (test_time + train_time) / 1000.0);
            eval_sample.put("Instances", (double) i);
            eval_sample.put("Samples", (double) (samples.size() + 1));
            samples.add(eval_sample);
            System.out.println("Sample (#" + samples.size() + ") of performance at " + i + "/"
                    + D.numInstances() + " instances.");
        }

    }

    result.output = Result.getStats(result, Vop);
    result.setMeasurement("Results sampled over time", Result.getResultsAsInstances(samples));

    result.vals.put("Test time", (test_time) / 1000.0);
    result.vals.put("Build time", (train_time) / 1000.0);
    result.vals.put("Total time", (test_time + train_time) / 1000.0);

    return result;
}

From source file:meka.classifiers.multilabel.MajorityLabelset.java

License:Open Source License

@Override
public void buildClassifier(Instances D) throws Exception {
    testCapabilities(D);/*from w  w  w  . ja va 2  s . c  om*/

    int L = D.classIndex();
    this.prediction = new double[L];

    for (int i = 0; i < D.numInstances(); i++) {
        updateCount(D.instance(i), L);
    }

}

From source file:meka.classifiers.multilabel.Maniac.java

License:Open Source License

@Override
public Instance transformInstance(Instance x) throws Exception {

    Instances tmpInst = new Instances(x.dataset());

    tmpInst.delete();//ww  w  . j  a  v  a  2s  .co  m
    tmpInst.add(x);

    Instances features = this.extractPart(tmpInst, false);

    Instances pseudoLabels = new Instances(this.compressedTemplateInst);
    Instance tmpin = pseudoLabels.instance(0);
    pseudoLabels.delete();

    pseudoLabels.add(tmpin);

    for (int i = 0; i < pseudoLabels.classIndex(); i++) {
        pseudoLabels.instance(0).setMissing(i);
    }

    Instances newDataSet = Instances.mergeInstances(pseudoLabels, features);
    newDataSet.setClassIndex(pseudoLabels.numAttributes());

    return newDataSet.instance(0);
}

From source file:meka.classifiers.multilabel.meta.BaggingML.java

License:Open Source License

@Override
public void buildClassifier(Instances train) throws Exception {
    testCapabilities(train);//from  w w  w.  ja  va  2s  . co  m

    if (getDebug())
        System.out.print("-: Models: ");

    train = new Instances(train);
    m_Classifiers = ProblemTransformationMethod.makeCopies((ProblemTransformationMethod) m_Classifier,
            m_NumIterations);

    for (int i = 0; i < m_NumIterations; i++) {
        Random r = new Random(m_Seed + i);
        Instances bag = new Instances(train, 0);
        if (m_Classifiers[i] instanceof Randomizable)
            ((Randomizable) m_Classifiers[i]).setSeed(m_Seed + i);
        if (getDebug())
            System.out.print("" + i + " ");

        int ixs[] = new int[train.numInstances()];
        for (int j = 0; j < ixs.length; j++) {
            ixs[r.nextInt(ixs.length)]++;
        }
        for (int j = 0; j < ixs.length; j++) {
            if (ixs[j] > 0) {
                Instance instance = train.instance(j);
                instance.setWeight(ixs[j]);
                bag.add(instance);
            }
        }

        m_Classifiers[i].buildClassifier(bag);
    }
    if (getDebug())
        System.out.println(":-");
}

From source file:meka.classifiers.multilabel.meta.BaggingMLdup.java

License:Open Source License

@Override
public void buildClassifier(Instances train) throws Exception {
    testCapabilities(train);/* www . jav a 2 s .co  m*/

    if (getDebug())
        System.out.print("-: Models: ");

    //m_Classifiers = (MultilabelClassifier[]) AbstractClassifier.makeCopies(m_Classifier, m_NumIterations);
    m_Classifiers = ProblemTransformationMethod.makeCopies((ProblemTransformationMethod) m_Classifier,
            m_NumIterations);

    for (int i = 0; i < m_NumIterations; i++) {
        Random r = new Random(m_Seed + i);
        Instances bag = new Instances(train, 0);
        if (m_Classifiers[i] instanceof Randomizable)
            ((Randomizable) m_Classifiers[i]).setSeed(m_Seed + i);
        if (getDebug())
            System.out.print("" + i + " ");

        int bag_no = (m_BagSizePercent * train.numInstances() / 100);
        //System.out.println(" bag no: "+bag_no);
        while (bag.numInstances() < bag_no) {
            bag.add(train.instance(r.nextInt(train.numInstances())));
        }
        m_Classifiers[i].buildClassifier(bag);
    }
    if (getDebug())
        System.out.println(":-");
}

From source file:meka.classifiers.multilabel.meta.MBR.java

License:Open Source License

@Override
public void buildClassifier(Instances data) throws Exception {
    testCapabilities(data);/* w w  w .j  av  a2s  .com*/

    int c = data.classIndex();

    // Base BR

    if (getDebug())
        System.out.println("Build BR Base (" + c + " models)");
    m_BASE = (BR) AbstractClassifier.forName(getClassifier().getClass().getName(),
            ((AbstractClassifier) getClassifier()).getOptions());
    m_BASE.buildClassifier(data);

    // Meta BR

    if (getDebug())
        System.out.println("Prepare Meta data           ");
    Instances meta_data = new Instances(data);

    FastVector BinaryClass = new FastVector(c);
    BinaryClass.addElement("0");
    BinaryClass.addElement("1");

    for (int i = 0; i < c; i++) {
        meta_data.insertAttributeAt(new Attribute("metaclass" + i, BinaryClass), c);
    }

    for (int i = 0; i < data.numInstances(); i++) {
        double cfn[] = m_BASE.distributionForInstance(data.instance(i));
        for (int a = 0; a < cfn.length; a++) {
            meta_data.instance(i).setValue(a + c, cfn[a]);
        }
    }

    meta_data.setClassIndex(c);
    m_InstancesTemplate = new Instances(meta_data, 0);

    if (getDebug())
        System.out.println("Build BR Meta (" + c + " models)");

    m_META = (BR) AbstractClassifier.forName(getClassifier().getClass().getName(),
            ((AbstractClassifier) getClassifier()).getOptions());
    m_META.buildClassifier(meta_data);
}

From source file:meka.classifiers.multilabel.meta.RandomSubspaceML.java

License:Open Source License

@Override
public void buildClassifier(Instances D) throws Exception {
    testCapabilities(D);/* ww w . j  av  a2  s. c  o  m*/

    m_InstancesTemplates = new Instances[m_NumIterations];
    m_InstanceTemplates = new Instance[m_NumIterations];

    if (getDebug())
        System.out.println("-: Models: ");

    m_Classifiers = ProblemTransformationMethod.makeCopies((ProblemTransformationMethod) m_Classifier,
            m_NumIterations);

    Random r = new Random(m_Seed);

    int N_sub = (D.numInstances() * m_BagSizePercent / 100);

    int L = D.classIndex();
    int d = D.numAttributes() - L;
    int d_new = d * m_AttSizePercent / 100;
    m_IndicesCut = new int[m_NumIterations][];

    for (int i = 0; i < m_NumIterations; i++) {

        // Downsize the instance space (exactly like in EnsembleML.java)

        if (getDebug())
            System.out.print("\t" + (i + 1) + ": ");
        D.randomize(r);
        Instances D_cut = new Instances(D, 0, N_sub);
        if (getDebug())
            System.out.print("N=" + D.numInstances() + " -> N'=" + D_cut.numInstances() + ", ");

        // Downsize attribute space

        D_cut.setClassIndex(-1);
        int indices_a[] = A.make_sequence(L, d + L);
        A.shuffle(indices_a, r);
        indices_a = Arrays.copyOfRange(indices_a, 0, d - d_new);
        Arrays.sort(indices_a);
        m_IndicesCut[i] = A.invert(indices_a, D.numAttributes());
        D_cut = F.remove(D_cut, indices_a, false);
        D_cut.setClassIndex(L);
        if (getDebug())
            System.out.print(" A:=" + (D.numAttributes() - L) + " -> A'=" + (D_cut.numAttributes() - L) + " ("
                    + m_IndicesCut[i][L] + ",...," + m_IndicesCut[i][m_IndicesCut[i].length - 1] + ")");

        // Train multi-label classifier

        if (m_Classifiers[i] instanceof Randomizable)
            ((Randomizable) m_Classifiers[i]).setSeed(m_Seed + i);
        if (getDebug())
            System.out.println(".");

        m_Classifiers[i].buildClassifier(D_cut);
        m_InstanceTemplates[i] = D_cut.instance(1);
        m_InstancesTemplates[i] = new Instances(D_cut, 0);
    }
    if (getDebug())
        System.out.println(":-");
}

From source file:meka.classifiers.multilabel.meta.SubsetMapper.java

License:Open Source License

@Override
public void buildClassifier(Instances D) throws Exception {
    testCapabilities(D);/*from   w w w  . j  a v  a2s  . co  m*/

    for (int i = 0; i < D.numInstances(); i++) {
        m_Count.put(MLUtils.toBitString(D.instance(i), D.classIndex()), 0);
    }

    m_Classifier.buildClassifier(D);

}

From source file:meka.classifiers.multilabel.MLCBMaD.java

License:Open Source License

@Override
public Instance transformInstance(Instance x) throws Exception {
    Instances tmpInst = new Instances(x.dataset());

    tmpInst.delete();/*w w  w .  jav  a  2 s.  c  o  m*/
    tmpInst.add(x);

    Instances features = this.extractPart(tmpInst, false);

    Instances pseudoLabels = new Instances(this.compressedMatrix);
    Instance tmpin = pseudoLabels.instance(0);
    pseudoLabels.delete();

    pseudoLabels.add(tmpin);

    for (int i = 0; i < pseudoLabels.classIndex(); i++) {
        pseudoLabels.instance(0).setMissing(i);
    }

    Instances newDataSet = Instances.mergeInstances(pseudoLabels, features);
    newDataSet.setClassIndex(this.size);

    return newDataSet.instance(0);
}