Example usage for weka.core Instances delete

Introduction

In this page you can find the example usage for weka.core Instances delete.

Prototype

public void delete()

Source Link

Document

Removes all instances from the set.

Usage

From source file:de.ugoe.cs.cpdp.dataprocessing.SimulationFilter.java

License:Apache License

@Override
public void apply(Instances testdata, Instances traindata) {
    Instances newDataSet = new Instances(traindata);
    traindata.delete();

    HashMap<Double, Instance> artifactNames = new HashMap<Double, Instance>();

    // This is to add all data, where the first occurence of the file has a bug
    ArrayList<Double> firstOccurenceArtifactNames = new ArrayList<Double>();

    // Sort dataset (StateID is connected to the date of commit: Lower StateID
    // means earlier commit than a higher stateID)
    Attribute wekaAttribute = newDataSet.attribute("Artifact.Target.StateID");
    newDataSet.sort(wekaAttribute);/*from w  w w. j a  v  a 2 s . c om*/

    /*
     * Logical summary: If there is an instance that dont have a bug, put it into the hashmap
     * (only unique values in there)
     * 
     * If there is an instance, that hava a bug look up if it is in the hashmap already (this
     * means: it does not had a bug before!): If this is true add it to a new dataset and remove
     * it from the hashmap, so that new changes from "nonBug" -> "bug" for this file can be
     * found.
     * 
     * If the instance has a bug and is not in the hashmap (this means: The file has a bug with
     * its first occurence or this file only has bugs and not an instance with no bug), then (if
     * it is not in the arrayList above) add it to the new dataset. This way it is possible to
     * get the first occurence of a file, which has a bug
     */
    for (int i = 0; i < newDataSet.numInstances(); i++) {
        Instance wekaInstance = newDataSet.instance(i);

        double newBugLabel = wekaInstance.classValue();
        Attribute wekaArtifactName = newDataSet.attribute("Artifact.Name");
        Double artifactName = wekaInstance.value(wekaArtifactName);

        if (newBugLabel == 0.0 && artifactNames.keySet().contains(artifactName)) {
            artifactNames.put(artifactName, wekaInstance);
        } else if (newBugLabel == 0.0 && !artifactNames.keySet().contains(artifactName)) {
            artifactNames.put(artifactName, wekaInstance);
        } else if (newBugLabel == 1.0 && artifactNames.keySet().contains(artifactName)) {
            traindata.add(wekaInstance);
            artifactNames.remove(artifactName);
        } else if (newBugLabel == 1.0 && !artifactNames.keySet().contains(artifactName)) {
            if (!firstOccurenceArtifactNames.contains(artifactName)) {
                traindata.add(wekaInstance);
                firstOccurenceArtifactNames.add(artifactName);
            }
        }
    }

    // If we have a file, that never had a bug (this is, when it is NOT in the
    // new created dataset, but it is in the HashMap from above) add it to
    // the new dataset

    double[] artifactNamesinNewDataSet = traindata.attributeToDoubleArray(0);
    HashMap<Double, Instance> artifactNamesCopy = new HashMap<Double, Instance>(artifactNames);

    for (Double artifactName : artifactNames.keySet()) {

        for (int i = 0; i < artifactNamesinNewDataSet.length; i++) {
            if (artifactNamesinNewDataSet[i] == artifactName) {
                artifactNamesCopy.remove(artifactName);
            }
        }
    }

    for (Double artifact : artifactNamesCopy.keySet()) {
        traindata.add(artifactNamesCopy.get(artifact));
    }

}

From source file:de.ugoe.cs.cpdp.dataselection.CLIFF.java

License:Apache License

/**
 * <p>//w w w  . j a  v  a  2 s.c o m
 * Applies the CLIFF relevancy filter to the data.
 * </p>
 *
 * @param data
 *            the data
 * @return CLIFF-filtered data
 */
protected Instances applyCLIFF(Instances data) {
    final double[][] powerAttributes = new double[data.size()][data.numAttributes()];
    final double[] powerEntity = new double[data.size()];

    final int[] counts = data.attributeStats(data.classIndex()).nominalCounts;
    final double probDefect = data.numInstances() / (double) counts[1];

    for (int j = 0; j < data.numAttributes(); j++) {
        if (data.attribute(j) != data.classAttribute()) {
            final double[] ranges = getRanges(data, j);
            final double[] probDefectRange = getRangeProbabilities(data, j, ranges);

            for (int i = 0; i < data.numInstances(); i++) {
                final double value = data.instance(i).value(j);
                final int range = determineRange(ranges, value);
                double probClass, probNotClass, probRangeClass, probRangeNotClass;
                if (data.instance(i).classValue() == 1) {
                    probClass = probDefect;
                    probNotClass = 1.0 - probDefect;
                    probRangeClass = probDefectRange[range];
                    probRangeNotClass = 1.0 - probDefectRange[range];
                } else {
                    probClass = 1.0 - probDefect;
                    probNotClass = probDefect;
                    probRangeClass = 1.0 - probDefectRange[range];
                    probRangeNotClass = probDefectRange[range];
                }
                powerAttributes[i][j] = Math.pow(probRangeClass, 2.0)
                        / (probRangeClass * probClass + probRangeNotClass * probNotClass);
            }
        }
    }

    for (int i = 0; i < data.numInstances(); i++) {
        powerEntity[i] = 1.0;
        for (int j = 0; j < data.numAttributes(); j++) {
            powerEntity[i] *= powerAttributes[i][j];
        }
    }
    double[] sortedPower = powerEntity.clone();
    Arrays.sort(sortedPower);
    double cutOff = sortedPower[(int) (data.numInstances() * (1 - percentage))];

    final Instances selected = new Instances(data);
    selected.delete();
    for (int i = 0; i < data.numInstances(); i++) {
        if (powerEntity[i] >= cutOff) {
            selected.add(data.instance(i));
        }
    }
    return selected;
}

From source file:entity.DifficultyResamplingManager.java

License:Open Source License

/**
 * called by generateResampledSubdataset
 * /*w  w w.ja v a  2  s . c om*/
 * @param originalDataset
 * @param subdatasetDimensions
 * @return
 */
private Instances generateResampledSubdataset(Instances originalDataset,
        SubdatasetDimensions subdatasetDimensions) {

    // creates an empty dataset
    Instances resampledSubdataset = new Instances(originalDataset);
    resampledSubdataset.delete();

    // randomize dataset instances order
    originalDataset.randomize(RandomizationManager.randomGenerator);

    // calc number of positives to insert
    int positivesToInsert = subdatasetDimensions.getP();
    if (verbose)
        System.out.println("[DifficultyResamplingManager, generateResampledSubdataset] positivesToInsert = "
                + positivesToInsert);

    // calc number of negatives to insert
    int negativesToInsert = subdatasetDimensions.getN();

    // iterates over the original dataset instances
    for (int i = 0; i < originalDataset.numInstances(); i++) {
        // if instance is positive and more are needed in the new dataset, inserts into new dataset
        if ((positivesToInsert > 0) && (originalDataset.instance(i).stringValue(originalDataset.classIndex())
                .equals(Settings.buggyLabel))) {
            resampledSubdataset.add(originalDataset.instance(i));
            positivesToInsert--;
        }

        // if instance is negative and more are needed in the new dataset, inserts into new dataset
        if ((negativesToInsert > 0) && (originalDataset.instance(i).stringValue(originalDataset.classIndex())
                .equals(Settings.nonbuggyLabel))) {
            resampledSubdataset.add(originalDataset.instance(i));
            negativesToInsert--;
        }

    }

    if (verbose)
        System.out.println("[DifficultyResamplingManager, generateResampledSubdataset] resampling terminato: "
                + this.printDatasetInfo(resampledSubdataset));
    return resampledSubdataset;
}

From source file:ergasia2pkg.ML_RUS.java

/**
 * Method to perform undersampling on the initial dataset. The method
 * removes instances from the dataset according to the algorithm proposed on
 * the paper, utilising the Mean Imbalance Ratio measure.
 *
 * @param mlData MultiLabelInstances object, holds a set of multilabel
 * instances/*from   w ww . j  av a 2s  .c  om*/
 * @return MultiLabelInstances object containing the initial labels minus
 * the labels removed by undersampling
 * @throws Exception
 */
@Override
public MultiLabelInstances transformInstances(MultiLabelInstances mlData) throws Exception {
    //Initialise the label counters
    labelCount(mlData);
    //Clone the dataset into a new object
    MultiLabelInstances mlDataClone = mlData.clone();
    //Clone a new set to contain all the instances that will be returned
    Instances mlDataReturned = mlData.clone().getDataSet();
    mlDataReturned.delete();

    //Calculate the number of samples to remove
    int samplesToDelete = (int) (mlData.getNumInstances() / (100 * P));
    int remainingLabels;
    //Declare two lists of lists, a minorityBag and a majorityBag. The minBag 
    //will contain lists (bags) of instances having labels with 
    //an imbalance ratio higher than the mean imbalance ratio. These will be 
    //set aside and not tampered with in any way. The majBag will also contain 
    //lists of instances having labels with an imbalance ratio lower than or 
    //equal to the mean imbalance ratio. These instances will be the candidates 
    //for deletion.
    List<List<Instance>> minBags = new ArrayList<>();
    List<List<Instance>> majBags = new ArrayList<>();
    //Get an array with the indices of all the labels
    int L[] = mlDataClone.getLabelIndices();
    //Calculate the dataset's mean imbalance ratio
    double meanIR = meanImbalanceRatio(mlDataClone);
    String labelName;
    int i = 0, m = 0, x, labelCounter = 0;
    //Declare a boolean array which will follow the labelset L, and determine 
    //whether or not a label's instances should be considered for undersampling
    //Initialise all its values to true.
    boolean included[] = new boolean[L.length];
    for (int k = 0; k < L.length; k++) {
        included[k] = true;
    }
    Random rand = new Random();
    //Perform the following operation for each label
    //Note that labels are represented by their integer index, which is then
    //transformed to its string name. This was done to avoid problems and 
    //exceptions thrown by methods required below
    for (int label : L) {
        //Get the label name from the current instance, based on label index
        labelName = mlDataClone.getDataSet().attribute(label).name();
        if (imbalanceRatioPerLabel(mlDataClone, labelName) > meanIR) {
            //if the imbalance ratio of the label is greater than the mean 
            //imbalance ratio of the dataset, add it to the minbag corresponding 
            //to the specific label. 
            minBags.add(new ArrayList<Instance>());
            //Add all instances containing this label to the minbag we just 
            //created
            for (int l = 0; l < mlDataClone.getNumInstances(); l++) {
                if (mlDataClone.getDataSet().get(l).value(label) == 1.0) {
                    minBags.get(i).add(mlDataClone.getDataSet().get(l));
                    //Remove the label from the dataset
                    mlDataClone.getDataSet().delete(l);
                }
            }
            //Set the included flag as false, so that the label is not added
            //to the majbags
            included[labelCounter] = false;
            i++;
        }
        labelCounter++;
    }
    //For every label again
    for (int label : L) {
        //Add a new majbag (one for each label)
        majBags.add(new ArrayList<Instance>());
        //Add all the instances having this label to the majbag. Note that 
        //this operation takes place on the cloned dataset, which now contains
        //only the instances not having minority labels
        for (int l = 0; l < mlDataClone.getNumInstances(); l++) {
            if (mlDataClone.getDataSet().get(l).value(label) == 1.0) {
                majBags.get(m).add(mlDataClone.getDataSet().get(l));
            }
        }
        m++;
    }
    remainingLabels = L.length - minBags.size();

    //While we haven't deleted all the samples yet and we still have labels 
    //to delete
    while (samplesToDelete > 0 && remainingLabels > 0) {
        //For each of the INITIAL labels (not only the ones in the cloned dataset)
        for (int j = 0; j < mlData.getNumLabels(); j++) {
            if (included[j]) {
                //if it is to be included (meaning it is a majority label), check 
                //if this bag contains instances. If it doesn't, decrease the 
                //numbers and go to the next iteration
                if (majBags.get(j).size() == 0) {
                    included[j] = false;
                    remainingLabels--;
                    continue;
                }
                //Get a random instance from the bag
                x = rand.nextInt(majBags.get(j).size());
                //Based on the instance and the index, get its label
                labelName = majBags.get(j).get(x).attribute(L[j]).name();
                //Remove the instance from the bag
                majBags.get(j).remove(x);
                //If the imbalance ratio of the label has increased beyond the 
                //acceptable limit of the mean imbalance ratio, remove this 
                //majbag from future candidates
                if (imbalanceRatioPerLabel(mlDataClone, labelName) >= meanIR) {
                    included[j] = false;
                    remainingLabels--;
                }
                samplesToDelete--;
            }
        }
    }
    //Add the contents of the minbags and the majbags to an empty dataset 
    //and return it
    for (List<Instance> list : minBags) {
        for (Instance inst : list) {
            mlDataReturned.add(inst);
        }
    }
    for (List<Instance> list : majBags) {
        for (Instance inst : list) {
            mlDataReturned.add(inst);
        }
    }

    return new MultiLabelInstances(mlDataReturned, mlData.getLabelsMetaData());
}

From source file:es.upm.dit.gsi.barmas.dataset.utils.DatasetSplitter.java

License:Open Source License

/**
 * @param folds/*from  w w w.j  a v  a  2  s .c o  m*/
 * @param minAgents
 * @param maxAgents
 * @param originalDatasetPath
 * @param outputDir
 * @param scenario
 * @param logger
 */
public void splitDataset(int folds, int minAgents, int maxAgents, String originalDatasetPath, String outputDir,
        String scenario, Logger logger) {

    int ratioint = (int) ((1 / (double) folds) * 100);
    double roundedratio = ((double) ratioint) / 100;

    // Look for essentials
    List<String[]> essentials = this.getEssentials(originalDatasetPath, logger);

    for (int fold = 0; fold < folds; fold++) {
        String outputDirWithRatio = outputDir + "/" + roundedratio + "testRatio/iteration-" + fold;
        File dir = new File(outputDirWithRatio);
        if (!dir.exists() || !dir.isDirectory()) {
            dir.mkdirs();
        }

        logger.finer("--> splitDataset()");
        logger.fine("Creating experiment.info...");

        try {

            Instances originalData = this.getDataFromCSV(originalDatasetPath);

            originalData.randomize(new Random());
            originalData.stratify(folds);

            // TestDataSet
            Instances testData = originalData.testCV(folds, fold);
            CSVSaver saver = new CSVSaver();
            ArffSaver arffsaver = new ArffSaver();
            File file = new File(outputDirWithRatio + File.separator + "test-dataset.csv");
            if (!file.exists()) {
                saver.resetOptions();
                saver.setInstances(testData);
                saver.setFile(file);
                saver.writeBatch();
            }

            file = new File(outputDirWithRatio + File.separator + "test-dataset.arff");
            if (!file.exists()) {
                arffsaver.resetOptions();
                arffsaver.setInstances(testData);
                arffsaver.setFile(file);
                arffsaver.writeBatch();
            }

            // BayesCentralDataset
            Instances trainData = originalData.trainCV(folds, fold);
            file = new File(outputDirWithRatio + File.separator + "bayes-central-dataset.csv");
            if (!file.exists()) {
                saver.resetOptions();
                saver.setInstances(trainData);
                saver.setFile(file);
                saver.writeBatch();
                this.copyFileUsingApacheCommonsIO(file,
                        new File(
                                outputDirWithRatio + File.separator + "bayes-central-dataset-noEssentials.csv"),
                        logger);
                CsvWriter w = new CsvWriter(new FileWriter(file, true), ',');
                for (String[] essential : essentials) {
                    w.writeRecord(essential);
                }
                w.close();
            }
            file = new File(outputDirWithRatio + File.separator + "bayes-central-dataset.arff");
            if (!file.exists()) {
                arffsaver.resetOptions();
                arffsaver.setInstances(trainData);
                arffsaver.setFile(file);
                arffsaver.writeBatch();
                this.copyFileUsingApacheCommonsIO(file, new File(
                        outputDirWithRatio + File.separator + "bayes-central-dataset-noEssentials.arff"),
                        logger);
                CsvWriter w = new CsvWriter(new FileWriter(file, true), ',');
                for (String[] essential : essentials) {
                    w.writeRecord(essential);
                }
                w.close();
            }

            // Agent datasets
            CsvReader csvreader = new CsvReader(new FileReader(new File(originalDatasetPath)));
            csvreader.readHeaders();
            String[] headers = csvreader.getHeaders();
            csvreader.close();

            for (int agents = minAgents; agents <= maxAgents; agents++) {
                this.createExperimentInfoFile(folds, agents, originalDatasetPath, outputDirWithRatio, scenario,
                        logger);
                HashMap<String, CsvWriter> writers = new HashMap<String, CsvWriter>();
                String agentsDatasetsDir = outputDirWithRatio + File.separator + agents + "agents";
                HashMap<String, CsvWriter> arffWriters = new HashMap<String, CsvWriter>();
                File f = new File(agentsDatasetsDir);
                if (!f.isDirectory()) {
                    f.mkdirs();
                }
                Instances copy = new Instances(trainData);
                copy.delete();
                for (int i = 0; i < agents; i++) {
                    String fileName = agentsDatasetsDir + File.separator + "agent-" + i + "-dataset.csv";
                    file = new File(fileName);
                    if (!file.exists()) {
                        CsvWriter writer = new CsvWriter(new FileWriter(fileName), ',');
                        writer.writeRecord(headers);
                        writers.put("AGENT" + i, writer);
                    }
                    fileName = agentsDatasetsDir + File.separator + "agent-" + i + "-dataset.arff";
                    file = new File(fileName);
                    if (!file.exists()) {
                        arffsaver.resetOptions();
                        arffsaver.setInstances(copy);
                        arffsaver.setFile(new File(fileName));
                        arffsaver.writeBatch();
                        CsvWriter arffwriter = new CsvWriter(new FileWriter(fileName, true), ',');
                        arffWriters.put("AGENT" + i, arffwriter);
                    }

                    logger.fine("AGENT" + i + " dataset created in csv and arff formats.");
                }
                // Append essentials to all
                for (String[] essential : essentials) {
                    for (CsvWriter wr : writers.values()) {
                        wr.writeRecord(essential);
                    }
                    for (CsvWriter arffwr : arffWriters.values()) {
                        arffwr.writeRecord(essential);
                    }
                }

                int agentCounter = 0;
                for (int j = 0; j < trainData.numInstances(); j++) {
                    Instance instance = trainData.instance(j);
                    CsvWriter writer = writers.get("AGENT" + agentCounter);
                    CsvWriter arffwriter = arffWriters.get("AGENT" + agentCounter);
                    String[] row = new String[instance.numAttributes()];
                    for (int a = 0; a < instance.numAttributes(); a++) {
                        row[a] = instance.stringValue(a);
                    }
                    if (writer != null) {
                        writer.writeRecord(row);
                    }
                    if (arffwriter != null) {
                        arffwriter.writeRecord(row);
                    }
                    agentCounter++;
                    if (agentCounter == agents) {
                        agentCounter = 0;
                    }
                }

                for (CsvWriter wr : writers.values()) {
                    wr.close();
                }
                for (CsvWriter arffwr : arffWriters.values()) {
                    arffwr.close();
                }
            }

        } catch (Exception e) {
            logger.severe("Exception while splitting dataset. ->");
            logger.severe(e.getMessage());
            System.exit(1);
        }

        logger.finest("Dataset for fold " + fold + " created.");
    }

    logger.finer("<-- splitDataset()");

}

From source file:gov.va.chir.tagline.TagLineTrainer.java

License:Open Source License

public TagLineModel getTagLineModel() {
    final Instances header = new Instances(instances);
    header.delete();

    tagLineModel.setHeader(header);/*  w ww .ja v  a 2s  .c om*/

    tagLineModel.setFeatures(extractor.getFeatures());

    return tagLineModel;
}

From source file:linqs.gaia.model.oc.ncc.WekaClassifier.java

License:Open Source License

@Override
public void learn(Iterable<? extends Decorable> trainitems, String targetschemaid, String targetfeatureid,
        List<String> featureids) {
    try {//from  w w  w .  j  a  v  a2s  . com
        this.targetschemaid = targetschemaid;
        this.targetfeatureid = targetfeatureid;
        this.featureids = new LinkedList<String>(featureids);

        LinkedHashSet<String> uniquefids = new LinkedHashSet<String>(featureids);
        if (uniquefids.size() != featureids.size()) {
            Log.WARN("Duplicate feature ids found in set of features: " + featureids);
            this.featureids = new ArrayList<String>(uniquefids);
        }

        if (this.featureids.contains(this.targetfeatureid)) {
            throw new InvalidStateException(
                    "Cannot include target feature as a dependency feature: " + this.targetfeatureid);
        }
        Log.DEBUG("Features Used: " + ListUtils.list2string(featureids, ","));

        // Added for weka.  Will only be used for training.
        // Target will not be used as a feature itself.
        this.featureids.add(this.targetfeatureid);

        String wcclass = WekaClassifier.DEFAULT_WEKA_CLASSIFIER;
        if (this.hasParameter("wekaclassifier")) {
            wcclass = this.getStringParameter("wekaclassifier");
        }

        String wekaparams = WekaClassifier.NO_PARAMS;
        if (this.hasParameter("wekaparams")) {
            wekaparams = this.getStringParameter("wekaparams");
        }
        boolean printwekamodel = this.hasParameter("printwekamodel", "yes");

        // Support generation of class based cost matrix
        if (this.hasParameter("costbyclass", "yes")) {
            fclasscount = new KeyedCount<String>();
        }

        // Weka instances
        int numinstances = IteratorUtils.numIterable(trainitems);
        Instances traininstances = this.gaia2weka(trainitems.iterator(), numinstances, false);

        // Handle class based cost matrix
        if (fclasscount != null) {
            if (wekaparams.equals(WekaClassifier.NO_PARAMS)) {
                wekaparams = "";
            } else {
                wekaparams += ",";
            }

            wekaparams += "-cost-matrix," + this.getCostMatrix();
        }

        // Set GAIA parameters and initialize classifier
        String params[] = null;
        if (!wekaparams.equals(WekaClassifier.NO_PARAMS)) {
            Log.DEBUG("Using wekaparams: " + wekaparams);
            params = wekaparams.split(",");
        }
        wekaclassifier = Classifier.forName(wcclass, params);

        // Train classifier
        if (this.hasParameter("wekatrainfile")) {
            String savefile = this.getStringParameter("wekatrainfile");
            this.saveWekaInstances(savefile, traininstances);
        }

        Log.DEBUG("Weka building classifier");
        SimpleTimer st = new SimpleTimer();
        st.start();
        wekaclassifier.buildClassifier(traininstances);
        Log.DEBUG("Weka done building classifier: (" + st.timeLapse(true) + ")");

        // Print Weka Model, if requested
        if (printwekamodel) {
            Log.INFO("Learned Weka Model:\n" + this.wekaclassifier);
        }

        // Print attributes
        if (Log.SHOWDEBUG) {

            String features = null;
            for (int f = 0; f < traininstances.numAttributes(); f++) {
                if (features == null) {
                    features = "";
                } else {
                    features += ",";
                }

                features += traininstances.attribute(f).name();
            }

            String options[] = wekaclassifier.getOptions();
            Log.DEBUG("Weka Options: " + ArrayUtils.array2String(options, ","));
        }

        // Clear instances once training is complete
        traininstances.delete();
    } catch (RuntimeException e) {
        throw e;
    } catch (Exception e) {
        throw new RuntimeException(e);
    }
}

From source file:meka.classifiers.multilabel.Maniac.java

License:Open Source License

@Override
public Instance transformInstance(Instance x) throws Exception {

    Instances tmpInst = new Instances(x.dataset());

    tmpInst.delete();
    tmpInst.add(x);/*from  w  w  w  .  j  a v  a 2  s  .co  m*/

    Instances features = this.extractPart(tmpInst, false);

    Instances pseudoLabels = new Instances(this.compressedTemplateInst);
    Instance tmpin = pseudoLabels.instance(0);
    pseudoLabels.delete();

    pseudoLabels.add(tmpin);

    for (int i = 0; i < pseudoLabels.classIndex(); i++) {
        pseudoLabels.instance(0).setMissing(i);
    }

    Instances newDataSet = Instances.mergeInstances(pseudoLabels, features);
    newDataSet.setClassIndex(pseudoLabels.numAttributes());

    return newDataSet.instance(0);
}

From source file:meka.classifiers.multilabel.MLCBMaD.java

License:Open Source License

@Override
public Instance transformInstance(Instance x) throws Exception {
    Instances tmpInst = new Instances(x.dataset());

    tmpInst.delete();
    tmpInst.add(x);/*from  w w w  .  java2 s  .c  o m*/

    Instances features = this.extractPart(tmpInst, false);

    Instances pseudoLabels = new Instances(this.compressedMatrix);
    Instance tmpin = pseudoLabels.instance(0);
    pseudoLabels.delete();

    pseudoLabels.add(tmpin);

    for (int i = 0; i < pseudoLabels.classIndex(); i++) {
        pseudoLabels.instance(0).setMissing(i);
    }

    Instances newDataSet = Instances.mergeInstances(pseudoLabels, features);
    newDataSet.setClassIndex(this.size);

    return newDataSet.instance(0);
}

From source file:meka.classifiers.multilabel.PLST.java

License:Open Source License

/**
 * Transforms the instance in the prediction process before given to the internal multi-label
 * or multi-target classifier. The instance is passed having the original set of labels, these
 * must be replaced with the transformed labels (attributes) so that the internla classifier
 * can predict them.// w w w.j a v  a2s.  c o m
 *
 * @param x The instance to transform. Consists of features and labels.
 * @return The transformed instance. Consists of features and transformed labels.
 */
@Override
public Instance transformInstance(Instance x) throws Exception {
    Instances tmpInst = new Instances(x.dataset());

    tmpInst.delete();
    tmpInst.add(x);

    Instances features = this.extractPart(tmpInst, false);

    Instances labels = new Instances(this.m_PatternInstances);

    labels.add(new DenseInstance(labels.numAttributes()));

    Instances result = Instances.mergeInstances(labels, features);

    result.setClassIndex(labels.numAttributes());

    return result.instance(0);
}