Example usage for weka.core Instances delete

List of usage examples for weka.core Instances delete

Introduction

In this page you can find the example usage for weka.core Instances delete.

Prototype


public void delete(int index) 

Source Link

Document

Removes an instance at the given position from the set.

Usage

From source file:gr.auth.ee.lcs.utilities.InstancesUtility.java

License:Open Source License

public static void splitDatasetIntoFolds(final AbstractLearningClassifierSystem lcs, final Instances dataset,
        final int numberOfFolds) throws Exception {

    Instances[] partitions = InstancesUtility.partitionInstances(lcs, dataset);

    testInstances.setSize(partitions.length);
    trainInstances.setSize(partitions.length);

    int lowerBound = (int) Math.floor((double) dataset.numInstances() / (double) numberOfFolds);
    int upperBound = (int) Math.ceil((double) dataset.numInstances() / (double) numberOfFolds);

    // we demand lowerBound <= numberOfTestInstancesPerFold[i] <= upperBound
    int[] numberOfTestInstancesPerFold = new int[numberOfFolds];

    /*/*from  www. j a va  2s  .c o m*/
     * let X partitions have partitions[i].numInstances() > numberOfFolds. 
     * Then, vectors testInstances and trainInstances, after the call of splitPartitionIntoFolds(), will hold X arrays 
      *   meaning X elements.  
     * */
    Vector<Integer> vectorOfPartitionIndices = new Vector<Integer>();

    for (int i = 0; i < partitions.length; i++) {

        if (partitions[i].numInstances() > numberOfFolds) {
            InstancesUtility.splitPartitionIntoFolds(partitions[i], numberOfFolds, i);
            vectorOfPartitionIndices.add(i);
        } else {

            Instances[] emptyArrayTest = new Instances[numberOfFolds];
            Instances[] emptyArrayTrain = new Instances[numberOfFolds];

            for (int j = 0; j < numberOfFolds; j++) {
                emptyArrayTest[j] = new Instances(partitions[0], partitions[i].numInstances());
                emptyArrayTrain[j] = new Instances(partitions[0], partitions[i].numInstances());

            }
            //placeholders
            InstancesUtility.testInstances.add(i, emptyArrayTest);
            InstancesUtility.trainInstances.add(i, emptyArrayTrain);
        }
    }

    /*
     * At this point all partitions with numInstances > numFolds have been successfully been split.
     * What is left is splitting the leftovers. 1st from the above partitions and 2nd from the ones that originally had numInstances < numFolds
     * */

    for (int i = 0; i < numberOfFolds; i++) {
        int instancesSum = 0;
        for (int j = 0; j < vectorOfPartitionIndices.size(); j++) {
            instancesSum += InstancesUtility.testInstances.elementAt(vectorOfPartitionIndices.elementAt(j))[i]
                    .numInstances();
        }

        // initial number of instances in test set per fold
        numberOfTestInstancesPerFold[i] = instancesSum;
    }

    /*
     * 
     *  i = 0 |_0|_0|_0|_0|_0|_0|_0|_0|_0|_0|
       i = 1 |_0|_0|_0|_0|_0|_0|_0|_0|_0|_0|
       i = 2 |_0|_0|_0|_0|_0|_0|_0|_0|_0|_0|
       i = 3 |_0|_0|_0|_0|_0|_0|_0|_0|_0|_0|
       i = 4 |_0|_0|_0|_0|_0|_0|_0|_0|_0|_0|
       i = 5 |_1|_1|_1|_1|_1|_1|_1|_1|_1|_1|
       i = 6 |_3|_3|_3|_3|_3|_3|_3|_3|_3|_3|
       i = 7 |_6|_6|_6|_6|_6|_6|_6|_6|_6|_6|
     * 
     * 
     * */

    for (int i = 0; i < partitions.length; i++) {

        int numberOfLeftoverInstances = partitions[i].numInstances() % numberOfFolds; // eg 64 % 10 = 4
        Instances leftoverInstances = new Instances(partitions[i], numberOfLeftoverInstances);

        if (numberOfLeftoverInstances > 0) {
            /*
             * Starting from the end. Anyhow they are the last {numberOfLeftoverInstances} instances in each partition
             * that splitPartitionIntoFolds() has been called on.
             * */
            for (int k = partitions[i].numInstances() - 1; k >= partitions[i].numInstances()
                    - numberOfLeftoverInstances; k--) {
                leftoverInstances.add(partitions[i].instance(k));
            }

            /*
             * For each partition, randomize the folds. Leftover instances will be placed in the first {numberOfLeftoverInstances} folds,
             * that are already randomly distributed. If the first folds were not randomly distributed, there would be an uneven distribution,
             * meaning that in the first ones there would be instances of the first partition and so on.
             * 
             * */

            ArrayList<Integer> folds = new ArrayList<Integer>();

            for (int k = 0; k < numberOfFolds; k++) {
                folds.add(k);
            }

            Collections.shuffle(folds);

            int j = 0;
            while (leftoverInstances.numInstances() > 0) {
                int foldIndex = folds.get(j);

                if (numberOfTestInstancesPerFold[foldIndex] < upperBound) {

                    Instance toBeAdded = leftoverInstances.instance(0);

                    // place the leftover first instance in a test set
                    testInstances.elementAt(i)[foldIndex].add(toBeAdded);

                    numberOfTestInstancesPerFold[foldIndex]++;

                    // the instance placed in a test set for the current fold, needs to be put in the train set for all the other folds,
                    // except for the current one of course
                    for (int k = 0; k < numberOfFolds; k++) {
                        if (k != foldIndex) {
                            trainInstances.elementAt(i)[k].add(toBeAdded);
                        }
                    }

                    // remove the instance placed in the test set
                    leftoverInstances.delete(0);

                }
                j++;
                // if j hits the roof reset it. 
                // there may exist folds that have not reached their upper limit and abandon them
                if (j == numberOfFolds)
                    j = 0;
            }
        }
    }
}

From source file:j48.GraftSplit.java

License:Open Source License

/**
 * deletes the cases in data that belong to leaf pointed to by
 * the test (i.e. the subset of interest).  this is useful so
 * the instances belonging to that leaf aren't passed down the
 * other branch./*from w w  w . j a v a  2 s  .  com*/
 *
 * @param data the instances to delete from
 */
public void deleteGraftedCases(Instances data) {

    int subOfInterest = subsetOfInterest();
    for (int x = 0; x < data.numInstances(); x++) {
        if (whichSubset(data.instance(x)) == subOfInterest) {
            data.delete(x--);
        }
    }
}

From source file:machinelearningproject.RFTree.java

@Override
public Tree buildTree(Instances instances) throws Exception {
    Tree tree = new Tree();
    ArrayList<String> availableAttributes = new ArrayList();
    int largestInfoGainAttrIdx = -1;
    double largestInfoGainAttrValue = 0.0;

    //choose random fraction
    int numAttr = instances.numAttributes();
    int k = (int) round(sqrt(numAttr));
    ArrayList<Integer> randomIdx = randomFraction(numAttr);

    for (int idx = 0; idx < k; idx++) {
        if (idx != instances.classIndex()) {
            availableAttributes.add(instances.attribute(idx).name());
        }/*from  w  ww. j a  v  a2 s .c  o m*/
    }

    if (instances.numInstances() == 0) {
        return null;
    } else if (calculateClassEntropy(instances) == 0.0) {
        // all examples have the sama classification
        tree.attributeName = instances.get(0).stringValue(instances.classIndex());
    } else if (availableAttributes.isEmpty()) {
        // mode classification
        tree.attributeName = getModeClass(instances, instances.classIndex());
    } else {
        for (int idx = 0; idx < instances.numAttributes(); idx++) {
            if (idx != instances.classIndex()) {
                double attrInfoGain = calculateInformationGain(instances, idx, instances.classIndex());
                if (largestInfoGainAttrValue < attrInfoGain) {
                    largestInfoGainAttrIdx = idx;
                    largestInfoGainAttrValue = attrInfoGain;
                }
            }
        }

        if (largestInfoGainAttrIdx != -1) {
            tree.attributeName = instances.attribute(largestInfoGainAttrIdx).name();
            ArrayList<String> attrValues = new ArrayList();
            for (int i = 0; i < instances.numInstances(); i++) {
                Instance instance = instances.get(i);
                String attrValue = instance.stringValue(largestInfoGainAttrIdx);
                if (attrValues.isEmpty() || !attrValues.contains(attrValue)) {
                    attrValues.add(attrValue);
                }
            }

            for (String attrValue : attrValues) {
                Node node = new Node(attrValue);
                Instances copyInstances = new Instances(instances);
                copyInstances.setClassIndex(instances.classIndex());
                int i = 0;
                while (i < copyInstances.numInstances()) {
                    Instance instance = copyInstances.get(i);
                    // reducing examples
                    if (!instance.stringValue(largestInfoGainAttrIdx).equals(attrValue)) {
                        copyInstances.delete(i);
                        i--;
                    }
                    i++;
                }
                copyInstances.deleteAttributeAt(largestInfoGainAttrIdx);
                node.subTree = buildTree(copyInstances);
                tree.nodes.add(node);
            }
        }
    }

    return tree;
}

From source file:machinelearningproject.Tree.java

public Tree buildTree(Instances instances) throws Exception {
    Tree tree = new Tree();
    ArrayList<String> availableAttributes = new ArrayList();

    int largestInfoGainAttrIdx = -1;
    double largestInfoGainAttrValue = 0.0;

    for (int idx = 0; idx < instances.numAttributes(); idx++) {
        if (idx != instances.classIndex()) {
            availableAttributes.add(instances.attribute(idx).name());
        }/*  w  w w . ja v  a  2 s  . com*/
    }

    if (instances.numInstances() == 0) {
        return null;
    } else if (calculateClassEntropy(instances) == 0.0) {
        // all examples have the sama classification
        tree.attributeName = instances.get(0).stringValue(instances.classIndex());
    } else if (availableAttributes.isEmpty()) {
        // mode classification
        tree.attributeName = getModeClass(instances, instances.classIndex());
    } else {
        for (int idx = 0; idx < instances.numAttributes(); idx++) {
            if (idx != instances.classIndex()) {
                double attrInfoGain = calculateInformationGain(instances, idx, instances.classIndex());
                if (largestInfoGainAttrValue < attrInfoGain) {
                    largestInfoGainAttrIdx = idx;
                    largestInfoGainAttrValue = attrInfoGain;
                }
            }
        }

        if (largestInfoGainAttrIdx != -1) {
            tree.attributeName = instances.attribute(largestInfoGainAttrIdx).name();
            ArrayList<String> attrValues = new ArrayList();
            for (int i = 0; i < instances.numInstances(); i++) {
                Instance instance = instances.get(i);
                String attrValue = instance.stringValue(largestInfoGainAttrIdx);
                if (attrValues.isEmpty() || !attrValues.contains(attrValue)) {
                    attrValues.add(attrValue);
                }
            }

            for (String attrValue : attrValues) {
                Node node = new Node(attrValue);
                Instances copyInstances = new Instances(instances);
                copyInstances.setClassIndex(instances.classIndex());
                int i = 0;
                while (i < copyInstances.numInstances()) {
                    Instance instance = copyInstances.get(i);
                    // reducing examples
                    if (!instance.stringValue(largestInfoGainAttrIdx).equals(attrValue)) {
                        copyInstances.delete(i);
                        i--;
                    }
                    i++;
                }
                copyInstances.deleteAttributeAt(largestInfoGainAttrIdx);
                node.subTree = buildTree(copyInstances);
                tree.nodes.add(node);
            }
        }
    }

    return tree;
}

From source file:meka.core.SuperLabelUtils.java

License:Open Source License

/**
 * Super Label Transformation - transform dataset D into a dataset with <code>k</code> multi-class target attributes.
 * Use the NSR/PS-style pruning and recomposition, according to partition 'indices', and pruning values 'p' and 'n'.
 * @see PSUtils.PSTransformation/*from   w  w  w  .  j  a  v a 2  s.  c  om*/
 * @param indices   m by k: m super variables, each relating to k original variables
 * @param    D   either multi-label or multi-target dataset
 * @param    p   pruning value
 * @param    n   subset relpacement value
 * @return       a multi-target dataset
 */
public static Instances SLTransformation(Instances D, int indices[][], int p, int n) {

    int L = D.classIndex();
    int K = indices.length;
    ArrayList<String> values[] = new ArrayList[K];
    HashMap<String, Integer> counts[] = new HashMap[K];

    // create D_
    Instances D_ = new Instances(D);

    // clear D_
    // F.removeLabels(D_,L);
    for (int j = 0; j < L; j++) {
        D_.deleteAttributeAt(0);
    }

    // create atts
    for (int j = 0; j < K; j++) {
        int att[] = indices[j];
        //int values[] = new int[2]; //getValues(indices,D,p);
        counts[j] = getCounts(D, att, p);
        Set<String> vals = counts[j].keySet(); //getValues(D,att,p);
        values[j] = new ArrayList(vals);
        D_.insertAttributeAt(new Attribute(encodeClass(att), new ArrayList(vals)), j);
    }

    // copy over values
    ArrayList<Integer> deleteList = new ArrayList<Integer>();
    for (int i = 0; i < D.numInstances(); i++) {
        Instance x = D.instance(i);
        for (int j = 0; j < K; j++) {
            String y = encodeValue(x, indices[j]);
            try {
                D_.instance(i).setValue(j, y); // y =
            } catch (Exception e) {
                // value not allowed
                deleteList.add(i); // mark it for deletion
                String y_close[] = getTopNSubsets(y, counts[j], n); // get N subsets
                for (int m = 0; m < y_close.length; m++) {
                    //System.out.println("add "+y_close[m]+" "+counts[j]);
                    Instance x_copy = (Instance) D_.instance(i).copy();
                    x_copy.setValue(j, y_close[m]);
                    x_copy.setWeight(1.0 / y_close.length);
                    D_.add(x_copy);
                }
            }
        }
    }
    // clean up
    Collections.sort(deleteList, Collections.reverseOrder());
    //System.out.println("Deleting "+deleteList.size()+" defunct instances.");
    for (int i : deleteList) {
        D_.delete(i);
    }
    // set class
    D_.setClassIndex(K);
    // done!
    return D_;
}

From source file:meka.filters.multilabel.SuperNodeFilter.java

License:Open Source License

/**
 * Merge Labels - Make a new 'D', with labels made into superlabels, according to partition 'indices', and pruning values 'p' and 'n'.
 * @param    D   assume attributes in D labeled by original index
 * @return       Instances with attributes at j and k moved to position L as (j,k), with classIndex = L-1
 *//*from   w  ww  . j  av a 2  s .  co m*/
public static Instances mergeLabels(Instances D, int indices[][], int p, int n) {

    int L = D.classIndex();
    int K = indices.length;
    ArrayList<String> values[] = new ArrayList[K];
    HashMap<String, Integer> counts[] = new HashMap[K];

    // create D_
    Instances D_ = new Instances(D);

    // clear D_
    for (int j = 0; j < L; j++) {
        D_.deleteAttributeAt(0);
    }

    // create atts
    for (int j = 0; j < K; j++) {
        int att[] = indices[j];
        //int values[] = new int[2]; //getValues(indices,D,p);
        counts[j] = getCounts(D, att, p);
        Set<String> vals = counts[j].keySet(); //getValues(D,att,p);
        values[j] = new ArrayList(vals);
        D_.insertAttributeAt(new Attribute(encodeClass(att), new ArrayList(vals)), j);
    }

    // copy over values
    ArrayList<Integer> deleteList = new ArrayList<Integer>();
    for (int i = 0; i < D.numInstances(); i++) {
        Instance x = D.instance(i);
        for (int j = 0; j < K; j++) {
            String y = encodeValue(x, indices[j]);
            try {
                D_.instance(i).setValue(j, y); // y = 
            } catch (Exception e) {
                // value not allowed
                deleteList.add(i); // mark it for deletion
                String y_close[] = NSR.getTopNSubsets(y, counts[j], n); // get N subsets
                for (int m = 0; m < y_close.length; m++) {
                    //System.out.println("add "+y_close[m]+" "+counts[j]);
                    Instance x_copy = (Instance) D_.instance(i).copy();
                    x_copy.setValue(j, y_close[m]);
                    x_copy.setWeight(1.0 / y_close.length);
                    D_.add(x_copy);
                }
            }
        }
    }
    // clean up
    Collections.sort(deleteList, Collections.reverseOrder());
    //System.out.println("Deleting "+deleteList.size()+" defunct instances.");
    for (int i : deleteList) {
        D_.delete(i);
    }
    // set class
    D_.setClassIndex(K);
    // done!
    D = null;
    return D_;
}

From source file:moa.tud.ke.patching.InstanceStore.java

public void cleanBatch(int index, int size) {
    Instances inst = getBatch(index);
    System.out.println("Size Batch: " + inst.size());
    while (inst.size() > size) {
        inst.delete(0);
    }/* ww  w  . j a va2 s. c o  m*/
    System.out.println("Size Batch: " + inst.size());
}

From source file:mulan.classifier.meta.HMC.java

License:Open Source License

private void buildRec(HMCNode node, Instances data) throws InvalidDataFormatException, Exception {
    String metaLabel = node.getName();

    //debug("Preparing node data");
    Set<String> childrenLabels = new HashSet<String>();
    Set<String> currentlyAvailableLabels = new HashSet<String>();
    if (metaLabel.equals("root")) {
        for (LabelNode child : originalMetaData.getRootLabels()) {
            childrenLabels.add(child.getName());
        }//w w w  . ja v  a 2  s .c om
        currentlyAvailableLabels = originalMetaData.getLabelNames();
    } else {
        LabelNode labelNode = originalMetaData.getLabelNode(metaLabel);
        for (LabelNode child : labelNode.getChildren()) {
            childrenLabels.add(child.getName());
        }
        currentlyAvailableLabels = labelNode.getDescendantLabels();
    }

    // delete non-children labels
    Set<String> labelsToDelete = new HashSet(currentlyAvailableLabels);
    labelsToDelete.removeAll(childrenLabels);
    //=====================================================
    //  System.out.println("Children: " + Arrays.toString(childrenLabels.toArray()));
    //  System.out.println("Labels to delete:" + Arrays.toString(labelsToDelete.toArray()));
    //======================================================
    int[] indicesToDelete = new int[labelsToDelete.size()];
    int counter1 = 0;
    for (String label : labelsToDelete) {
        indicesToDelete[counter1] = data.attribute(label).index();
        counter1++;
    }

    Remove filter1 = new Remove();
    filter1.setAttributeIndicesArray(indicesToDelete);
    filter1.setInputFormat(data);
    Instances nodeInstances = Filter.useFilter(data, filter1);
    //        System.out.println()

    // create meta data
    LabelsMetaDataImpl nodeMetaData = new LabelsMetaDataImpl();
    for (String label : childrenLabels) {
        nodeMetaData.addRootNode(new LabelNodeImpl(label));
    }

    // create multi-label instance
    MultiLabelInstances nodeData = new MultiLabelInstances(nodeInstances, nodeMetaData);
    //==================================================
    // System.out.println("Building model");
    //============================================
    node.build(nodeData);
    //============================================
    System.out.println("spark #instances:" + nodeInstances.numInstances());
    //============================================
    TotalUsedTrainInsts += nodeInstances.numInstances();
    NoNodes++;
    //============================================
    // System.out.println("spark:#nodes: "+ root);
    //============================================
    for (String childLabel : childrenLabels) {
        LabelNode childNode = originalMetaData.getLabelNode(childLabel);
        if (!childNode.hasChildren()) {
            continue;
        }
        //=================================

        //    System.out.println("Preparing child data");
        //============================================
        // remove instances where child is 0
        int childMetaLabelIndex = data.attribute(childLabel).index();
        Instances childData = new Instances(data);
        for (int i = 0; i < childData.numInstances(); i++) {
            if (childData.instance(i).stringValue(childMetaLabelIndex).equals("0")) {
                childData.delete(i);
                // While deleting an instance from the trainSet, i must reduced too
                i--;
            }
        }

        // delete non-descendant labels
        Set<String> descendantLabels = childNode.getDescendantLabels();
        Set<String> labelsToDelete2 = new HashSet(currentlyAvailableLabels);
        labelsToDelete2.removeAll(descendantLabels);
        //System.out.println("Labels to delete:" + Arrays.toString(labelsToDelete2.toArray()));
        int[] indicesToDelete2 = new int[labelsToDelete2.size()];
        int counter2 = 0;
        for (String label : labelsToDelete2) {
            indicesToDelete2[counter2] = childData.attribute(label).index();
            counter2++;
        }

        Remove filter2 = new Remove();
        filter2.setAttributeIndicesArray(indicesToDelete2);
        filter2.setInputFormat(childData);
        childData = Filter.useFilter(childData, filter2);

        MultiLabelLearner mll = baseLearner.makeCopy();
        HMCNode child = new HMCNode(childLabel, mll);
        node.addChild(child);

        buildRec(child, childData);
    }

}

From source file:mulan.classifier.meta.HMC.java

License:Open Source License

/**
 * Deletes the unnecessary instances, the instances that have value 0 on
 * given attribute.//from w w w . ja v  a 2 s.  c  om
 *
 * @param trainSet the trainSet on which the deletion will be applied
 * @param attrIndex the index of the attribute that the deletion is based
 */
protected void deleteInstances(Instances trainSet, int attrIndex) {
    for (int i = 0; i < trainSet.numInstances(); i++) {
        if (trainSet.instance(i).stringValue(attrIndex).equals("0")) {
            trainSet.delete(i);
            // While deleting an instance from the trainSet, i must reduced too
            i--;
        }
    }
}

From source file:mulan.data.IterativeStratification.java

License:Open Source License

private Instances[] foldsCreation(Instances workingSet, Random random, double[] splitRatio, int numLabels,
        int[] labelIndices, int totalNumberOfInstances) {
    int numFolds = splitRatio.length;
    // The instances on the final folds
    Instances[] instancesOnSplits = new Instances[numFolds];
    // Initialize the folds
    for (int fold = 0; fold < numFolds; fold++) {
        instancesOnSplits[fold] = new Instances(workingSet, 0);
    }// w  w w  .  j av a2  s .com

    // *************************************
    // First Part of the Algorithm LINES 1-9
    // *************************************

    // LINE 7 in the Algorithm
    // The vector with the frequencies in the data set (frequency: the number of 
    // examples per label)
    int[] frequenciesOnDataset = new int[numLabels];
    // Calculating the number of examples per label in the initial data set
    frequenciesOnDataset = calculatingTheFrequencies(workingSet, numLabels, labelIndices);

    // LINE 2-3 and 8-9 in the Algorithm
    // I define the desiredFolds that I want by calculating them using the
    // array of the splitRatio and in the last column the desired number of
    // instances in each fold
    double[][] desiredSplit = new double[numFolds][numLabels + 1];
    // In the beginning is the desiredSplit and I reduce the values of the
    // frequencies (first numLabels columns) and of the instances (last column)
    // every time I put an instance in the splits.
    desiredSplit = calculatingTheDesiredSplits(frequenciesOnDataset, splitRatio, numLabels,
            totalNumberOfInstances);

    // *************************************
    // Second Part of the Algorithm LINES 10-34
    // *************************************

    // LINE 11-14 in the Algorithm
    // A vector to keep the rarest label. I keep both the index [0] and the
    // value [1], when I say value I mean the number of examples for the rarest label.      
    int[] smallestFreqLabel = new int[2];
    // Function which returns these characteristics of the rarest label
    smallestFreqLabel = takingTheSmallestIndexAndNumberInVector(frequenciesOnDataset, totalNumberOfInstances);

    // This variable gives me the fold in which I will insert an instance
    int splitToBeInserted;
    // The instances that are filtered for a particular label (there are 1
    // for a particular label)
    Instances filteredInstancesForLabel;
    Instance filteredInstance;

    boolean[] trueLabels = new boolean[numLabels];

    for (int lab = 0; lab < numLabels; lab++) {

        // By calling the function I take the instances that are annotated
        // with the label with index smallestFreqLabel[0]
        // and I also take the workingSet with the remaining instances.
        // I use a temporal variable temp for making the code more efficient
        Instances[] temp = new Instances[2];
        temp = takeTheInstancesOfTheLabel(workingSet, numLabels, labelIndices, smallestFreqLabel);

        // The instances that I will split at this point
        // LINE 13 in the Algorithm
        filteredInstancesForLabel = temp[0];
        // The remaining instances
        workingSet = temp[1];

        // This variable is used to tell me the suitable folds in which an instance can be inserted.
        // The first element contains the total number of the proper Folds and the rest are the indexes of these folds
        int[] possibleSplits;

        // I share the filtered instances into the splits. 
        // The first priority is the splits with the highest desired frequency.
        // The second priority is the split with the highest desired number of instances.
        // If two splits are equivalent for the above two rules I decide randomly
        // in which fold the instance will be inserted
        for (int instancesOfTheLab = 0; instancesOfTheLab < filteredInstancesForLabel
                .numInstances(); instancesOfTheLab++) {
            filteredInstance = filteredInstancesForLabel.instance(instancesOfTheLab);
            trueLabels = getTrueLabels(filteredInstance, numLabels, labelIndices);

            // LINES 20-27 in the Algorithm
            // I call that function to return the possible folds with the above priorities.
            // possibleSplits[0] contains the total number of possible folds and the rest elements
            // are the indexes of the possible folds. 
            possibleSplits = findThePossibleSpit(desiredSplit, smallestFreqLabel[0], numFolds);
            // I decide in which fold to enter the instance. If there are more that one possible folds
            // I break the ties randomly
            if (possibleSplits[0] != 1) {
                splitToBeInserted = possibleSplits[random.nextInt(possibleSplits[0]) + 1];
            } else {
                splitToBeInserted = possibleSplits[1];
            }

            // LINE 28 in the Algorithm
            // Enter the instance to the proper fold
            instancesOnSplits[splitToBeInserted].add(filteredInstance);

            // LINE 30-32 in the Algorithm
            // Update the statistics of this fold
            desiredSplit[splitToBeInserted] = updateDesiredSplitStatistics(desiredSplit[splitToBeInserted],
                    trueLabels);
        }

        // I updating the values for the next iteration
        frequenciesOnDataset = calculatingTheFrequencies(workingSet, numLabels, labelIndices);
        smallestFreqLabel = takingTheSmallestIndexAndNumberInVector(frequenciesOnDataset,
                totalNumberOfInstances);

    }

    // Special case when I have a number of examples that are not annotated with any label (i.e. mediamill data set)
    // These examples are distributed so as to balance the desired number of examples at each fold
    Instance noAnnotatedInstances;
    int[] possibleSplitsNoAnnotated = new int[numFolds];
    while (workingSet.numInstances() != 0) {

        possibleSplitsNoAnnotated = returnPossibleSplitsForNotAnnotated(desiredSplit);
        noAnnotatedInstances = workingSet.instance(0);
        if (possibleSplitsNoAnnotated[0] != 1) {
            splitToBeInserted = possibleSplitsNoAnnotated[random.nextInt(possibleSplitsNoAnnotated[0]) + 1];
        } else {
            splitToBeInserted = possibleSplitsNoAnnotated[1];
        }
        // Entering the instance to the proper fold
        instancesOnSplits[splitToBeInserted].add(noAnnotatedInstances);
        // Updating the instances
        desiredSplit[splitToBeInserted][desiredSplit[splitToBeInserted].length
                - 1] = desiredSplit[splitToBeInserted][desiredSplit[splitToBeInserted].length - 1] - 1;

        // Deleting the instance from the working set
        workingSet.delete(0);

    }

    return instancesOnSplits;
}