Example usage for weka.core Instances remove

List of usage examples for weka.core Instances remove

Introduction

In this page you can find the example usage for weka.core Instances remove.

Prototype



@Override
public Instance remove(int index) 

Source Link

Document

Removes the instance at the given position.

Usage

From source file:machinelearning_cw.MachineLearning_CW.java

/**
 * //from  w w  w . ja v  a 2s  . c  om
 * Tests the accuracy of a classifier against a collection of datasets
 * by Resampling.
 * 
 * @param classifier The classifier to be tested
 * @param trainingDatasets A collection of Instances objects containing
 * the training data for different datasets.
 * @param testDatasets A collection of Instances objects containing
 * the test data for different datasets.
 * @param t The number of times the data should be sampled
 * @throws Exception 
 */
public static void performClassifierAccuracyTests(Classifier classifier, ArrayList<Instances> trainingDatasets,
        ArrayList<Instances> testDatasets, int t) throws Exception {
    ArrayList<Double> accuracies = new ArrayList<Double>();
    Random randomGenerator = new Random();

    for (int i = 0; i < trainingDatasets.size(); i++) {
        Instances train = trainingDatasets.get(i);
        Instances test = testDatasets.get(i);

        /* Test by Resampling. First, merge train and test data */
        for (int j = 0; j < t; j++) {

            Instances mergedDataSet = mergeDataSets(train, test);
            train.clear();
            test.clear();

            /* Randomly sample n instances from the merged dataset
             * (without replacement) to form the train set
             */
            int n = mergedDataSet.size() / 2;
            for (int k = 0; k < n; k++) {
                int indexToRemove = randomGenerator.nextInt(mergedDataSet.size());
                train.add(mergedDataSet.remove(indexToRemove));
            }

            /* Reserve remainingdata as test data */
            for (int k = 0; k < mergedDataSet.size(); k++) {
                test.add(mergedDataSet.remove(k));
            }

            /* Train classifier. Recalculates k */
            classifier.buildClassifier(train);

            /* Measure and record the accuracy of the classifier on
             * the test set
             */
            double accuracy = Helpers.findClassifierAccuracy(classifier, test);
            accuracies.add(accuracy);
        }

        double accuracyAverage = average(accuracies);
        System.out.println(accuracyAverage);
    }

}

From source file:meka.classifiers.multilabel.cc.CNode.java

License:Open Source License

/**
 * Main - run some tests./*from  www .  ja v  a 2  s .  c o m*/
 */
public static void main(String args[]) throws Exception {
    Instances D = new Instances(new FileReader(args[0]));
    Instance x = D.lastInstance();
    D.remove(D.numInstances() - 1);
    int L = Integer.parseInt(args[1]);
    D.setClassIndex(L);
    double y[] = new double[L];
    Random r = new Random();
    int s[] = new int[] { 1, 0, 2 };
    int PA_J[][] = new int[][] { {}, {}, { 0, 1 }, };

    //MLUtils.randomize(s,r);
    // MUST GO IN TREE ORDER !!
    for (int j : s) {
        int pa_j[] = PA_J[j];
        System.out.println("PARENTS = " + Arrays.toString(pa_j));
        //MLUtils.randomize(pa_j,r);
        System.out.println("**** TRAINING ***");
        CNode n = new CNode(j, null, pa_j);
        n.build(D, new SMO());
        /*
         */
        //Instances D_ = n.transform(D);
        //n.T = D_;
        System.out.println("============== D_" + j + " / class = " + n.T.classIndex() + " =");
        System.out.println("" + n.T);
        System.out.println("**** TESTING ****");
        /*
        Instance x_ = MLUtils.setTemplate(x,(Instance)D_.firstInstance().copy(),D_);
        for(int pa : pa_j) {
           //System.out.println(""+map[pa]);
           x_.setValue(n.map[pa],y[pa]);
        }
        //x_.setDataset(T);
        x_.setClassMissing();
         */
        //n.T = D_;
        Instance x_ = n.transform(x, y);
        System.out.println("" + x_);
        y[j] = 1;
    }
}

From source file:net.sf.jclal.sampling.supervised.Resample.java

License:Open Source License

/**
 * creates the subsample with replacement
 *
 * @param dataSet The dataset to extract a percent of instances
 * @param sampleSize the size to generate
 * @param actualClasses The actual classes
 * @param classIndices The indexes of the classes
 *//*from  ww w  .  ja va  2 s. c  om*/
public void createSubsampleWithReplacement(WekaDataset dataSet, int sampleSize, int actualClasses,
        int[] classIndices) {

    int originalSize = dataSet.getNumInstances();

    Set<Integer> indexes = new HashSet<Integer>();

    Instances labeledInstances = new Instances(dataSet.getDataset(), sampleSize);

    for (int i = 0; i < sampleSize; i++) {

        int index = 0;

        if (getRandgen().uniform(0, 1) < biasToUniformClass) {

            // Pick a random class (of those classes that actually appear)
            int cIndex = getRandgen().choose(0, actualClasses);

            for (int j = 0, k = 0; j < classIndices.length - 1; j++) {
                if ((classIndices[j] != classIndices[j + 1]) && (k++ >= cIndex)) {
                    // Pick a random instance of the designated class
                    index = classIndices[j] + getRandgen().choose(0, classIndices[j + 1] - classIndices[j]);
                    break;
                }
            }
        } else {
            index = getRandgen().choose(0, originalSize);
        }

        labeledInstances.add((Instance) dataSet.instance(index).copy());
        indexes.add(index);
    }

    setLabeledData(new WekaDataset(labeledInstances));

    ArrayList<Container> indexesArray = new ArrayList<Container>();

    for (Integer i : indexes) {
        indexesArray.add(new Container(i, i));
    }

    //The array is ordered in descendent order
    OrderUtils.mergeSort(indexesArray, true);

    //Copy the entire dataset into unlabeled set
    Instances unlabeledInstances = new Instances(dataSet.getDataset());

    //remove the instances that have been selected previously
    for (Container pair : indexesArray) {
        unlabeledInstances.remove(Integer.parseInt(pair.getValue().toString()));
    }

    setUnlabeledData(new WekaDataset(unlabeledInstances));

    //clean up
    labeledInstances.clear();
    unlabeledInstances.clear();
    indexes.clear();
    indexesArray.clear();

    labeledInstances = null;
    unlabeledInstances = null;
    indexes = null;
    indexesArray = null;

}

From source file:net.sf.jclal.sampling.unsupervised.Resample.java

License:Open Source License

/**
 * creates the subsample with replacement
 *
 * @param dataSet The dataset to extract a percent of instances
 * @param sampleSize the size to generate
 *///w  ww .  j  a v  a 2s.c o  m
public void createSubsampleWithReplacement(IDataset dataSet, int sampleSize) {

    int origSize = dataSet.getNumInstances();

    Set<Integer> indexes = new HashSet<Integer>();

    Instances labeledInstances = new Instances(dataSet.getDataset(), sampleSize);

    //Fill the labeled set
    for (int i = 0; i < sampleSize; i++) {
        int index = getRandgen().choose(0, origSize);
        labeledInstances.add((Instance) dataSet.instance(index).copy());
        indexes.add(index);
    }

    if (dataSet instanceof WekaDataset) {
        setLabeledData(new WekaDataset(labeledInstances));
    }

    if (dataSet instanceof MulanDataset) {
        setLabeledData(new MulanDataset(labeledInstances, ((MulanDataset) dataSet).getLabelsMetaData()));
    }

    ArrayList<Container> indexesArray = new ArrayList<Container>();

    for (Integer i : indexes) {
        indexesArray.add(new Container(i, i));
    }

    //The array is ordered in descendent order
    OrderUtils.mergeSort(indexesArray, true);

    //Copy the entire dataset into unlabeled set
    Instances unlabeledInstances = new Instances(dataSet.getDataset());

    //remove the instances that have been selected previously
    for (Container pair : indexesArray) {
        unlabeledInstances.remove(Integer.parseInt(pair.getValue().toString()));
    }

    if (dataSet instanceof WekaDataset) {
        setUnlabeledData(new WekaDataset(unlabeledInstances));
    }

    if (dataSet instanceof MulanDataset) {
        setUnlabeledData(new MulanDataset(unlabeledInstances, ((MulanDataset) dataSet).getLabelsMetaData()));
    }

    // clean up
    unlabeledInstances.clear();
    labeledInstances.clear();

    unlabeledInstances = null;
    labeledInstances = null;

    indexes.clear();
    indexesArray.clear();

    indexes = null;
    indexesArray = null;
}

From source file:tr.gov.ulakbim.jDenetX.experiments.wrappers.EvalActiveBoostingID.java

License:Open Source License

public Instances clusteredInstances(Instances data) {
    if (data == null) {
        throw new NullPointerException("Data is null at clusteredInstances method");
    }// w w  w.  ja  va 2  s . c o  m
    Instances sampled_data = data;
    for (int i = 0; i < sampled_data.numInstances(); i++) {
        sampled_data.remove(i);
    }

    SimpleKMeans sKmeans = new SimpleKMeans();
    data.setClassIndex(data.numAttributes() - 1);
    Remove filter = new Remove();
    filter.setAttributeIndices("" + (data.classIndex() + 1));
    List assignments = new ArrayList();

    try {
        filter.setInputFormat(data);
        Instances dataClusterer = Filter.useFilter(data, filter);
        String[] options = new String[3];
        options[0] = "-I"; // max. iterations
        options[1] = "500";
        options[2] = "-O";
        sKmeans.setNumClusters(data.numClasses());
        sKmeans.setOptions(options);
        sKmeans.buildClusterer(dataClusterer);
        System.out.println("Kmeans\n:" + sKmeans);
        System.out.println(Arrays.toString(sKmeans.getAssignments()));
        assignments = Arrays.asList(sKmeans.getAssignments());
    } catch (Exception e) {
        e.printStackTrace();
    }
    System.out.println("Assignments\n: " + assignments);
    ClusterEvaluation eval = new ClusterEvaluation();
    eval.setClusterer(sKmeans);
    try {
        eval.evaluateClusterer(data);
    } catch (Exception e) {
        e.printStackTrace();
    }
    int classesToClustersMap[] = eval.getClassesToClusters();
    for (int i = 0; i < classesToClustersMap.length; i++) {
        if (assignments.get(i).equals(((Integer) classesToClustersMap[(int) data.get(i).classValue()]))) {
            ((Instances) sampled_data).add(data.get(i));
        }
    }
    return ((Instances) sampled_data);
}

From source file:tr.gov.ulakbim.jDenetX.experiments.wrappers.EvalActiveBoostingID.java

License:Open Source License

public static Instances clusterInstances(Instances data) {
    XMeans xmeans = new XMeans();
    Remove filter = new Remove();
    Instances dataClusterer = null;/*from w w  w .j  av a  2s.  com*/
    if (data == null) {
        throw new NullPointerException("Data is null at clusteredInstances method");
    }
    //Get the attributes from the data for creating the sampled_data object

    ArrayList<Attribute> attrList = new ArrayList<Attribute>();
    Enumeration attributes = data.enumerateAttributes();
    while (attributes.hasMoreElements()) {
        attrList.add((Attribute) attributes.nextElement());
    }

    Instances sampled_data = new Instances(data.relationName(), attrList, 0);
    data.setClassIndex(data.numAttributes() - 1);
    sampled_data.setClassIndex(data.numAttributes() - 1);
    filter.setAttributeIndices("" + (data.classIndex() + 1));
    data.remove(0);//In Wavelet Stream of MOA always the first element comes without class

    try {
        filter.setInputFormat(data);
        dataClusterer = Filter.useFilter(data, filter);
        String[] options = new String[4];
        options[0] = "-L"; // max. iterations
        options[1] = Integer.toString(noOfClassesInPool - 1);
        if (noOfClassesInPool > 2) {
            options[1] = Integer.toString(noOfClassesInPool - 1);
            xmeans.setMinNumClusters(noOfClassesInPool - 1);
        } else {
            options[1] = Integer.toString(noOfClassesInPool);
            xmeans.setMinNumClusters(noOfClassesInPool);
        }
        xmeans.setMaxNumClusters(data.numClasses() + 1);
        System.out.println("No of classes in the pool: " + noOfClassesInPool);
        xmeans.setUseKDTree(true);
        //xmeans.setOptions(options);
        xmeans.buildClusterer(dataClusterer);
        System.out.println("Xmeans\n:" + xmeans);
    } catch (Exception e) {
        e.printStackTrace();
    }
    //System.out.println("Assignments\n: " + assignments);
    ClusterEvaluation eval = new ClusterEvaluation();
    eval.setClusterer(xmeans);
    try {
        eval.evaluateClusterer(data);
        int classesToClustersMap[] = eval.getClassesToClusters();
        //check the classes to cluster map
        int clusterNo = 0;
        for (int i = 0; i < data.size(); i++) {
            clusterNo = xmeans.clusterInstance(dataClusterer.get(i));
            //Check if the class value of instance and class value of cluster matches
            if ((int) data.get(i).classValue() == classesToClustersMap[clusterNo]) {
                sampled_data.add(data.get(i));
            }
        }
    } catch (Exception e) {
        e.printStackTrace();
    }
    return ((Instances) sampled_data);
}