Example usage for weka.core Instance setDataset

List of usage examples for weka.core Instance setDataset

Introduction

In this page you can find the example usage for weka.core Instance setDataset.

Prototype

public void setDataset(Instances instances);

Source Link

Document

Sets the reference to the dataset.

Usage

From source file:mulan.classifier.transformation.TwoStageClassifierChainArchitecture.java

License:Open Source License

private MultiLabelInstances GenerateChain(MultiLabelInstances trainingSet) throws Exception {
    MultiLabelInstances tempTrainingSet = new MultiLabelInstances(
            new Instances(trainingSet.getDataSet(), trainingSet.getDataSet().numInstances()),
            trainingSet.getLabelsMetaData());

    for (int i = trainingSet.getNumLabels() - 1; i >= 0; i--) {
        tempTrainingSet.getDataSet().insertAttributeAt(new Attribute("0vs" + i + 1), 0);
    }/*from w  ww  .  j a v  a 2  s.  c om*/

    for (int i = 0; i < trainingSet.getDataSet().numInstances(); i++) {

        MultiLabelOutput output = virtualLabelModels.makePrediction(trainingSet.getDataSet().instance(i));

        Instance transformed;

        if (trainingSet.getDataSet().instance(i) instanceof SparseInstance) {
            transformed = modifySparseInstance(trainingSet.getDataSet().instance(i), output.getConfidences());
        } else {
            transformed = modifyDenseInstance(trainingSet.getDataSet().instance(i), output.getConfidences());
        }
        tempTrainingSet.getDataSet().add(transformed);
        transformed.setDataset(tempTrainingSet.getDataSet());
    }

    return tempTrainingSet;
}

From source file:mulan.classifier.transformation.TwoStagePrunedClassifierChainArchitecture.java

License:Open Source License

/**
 * This method does a prediction for an instance with the values of label
 * missing according to Two Stage Voting Method (TSVM), which is described
 * in : Madjarov, Gj., Gjorgjevikj, D. and Dzeroski, S. Efficient two stage
 * voting architecture for pairwise multi-label classification. In AI 2010:
 * Advances in Artificial Intelligence (J. Li, ed.), vol. 6464 of Lecture
 * Notes in Computer Science, pp. 164173, 2011
 *
 * @param instance/* w  ww  . java 2s  .  c om*/
 * @return prediction
 * @throws java.lang.Exception Potential exception thrown. To be handled in an upper level.
 */
private MultiLabelOutput makePredictionTSCCA(Instance instance) throws Exception {
    boolean[] bipartition = new boolean[numLabels];
    double[] confidences = new double[numLabels];
    int[] voteLabel = new int[numLabels + 1];
    int[] noVoteLabel = new int[numLabels + 1];
    int[] voteFromVirtualModels = new int[numLabels];
    double[] confidenceFromVirtualModels = new double[numLabels];

    //System.out.println("Instance:" + instance.toString());

    //initialize the array voteLabel
    Arrays.fill(voteLabel, 0);
    Arrays.fill(noVoteLabel, 0);
    Arrays.fill(voteFromVirtualModels, 0);
    Arrays.fill(confidenceFromVirtualModels, 0.0);

    int voteVirtual = 0;
    MultiLabelOutput virtualMLO = virtualLabelModels.makePrediction(instance);
    boolean[] virtualBipartition = virtualMLO.getBipartition();

    //number of classifiers of the first layer that forward the instance to the second layer
    int forwards = 0;

    for (int i = 0; i < numLabels; i++) {
        if (virtualMLO.hasConfidences()) {
            confidenceFromVirtualModels[i] = virtualMLO.getConfidences()[i];
            //System.out.print(confidenceFromVirtualModels[i]);
            //System.out.print("\t");
        }
        if (virtualBipartition[i]) {
            voteLabel[i]++;
            voteFromVirtualModels[i]++;
        } else {
            voteVirtual++;
        }

        if (confidenceFromVirtualModels[i] > threshold) {
            forwards++;
        }
    }

    int counter = 0;
    for (int label1 = 0; label1 < numLabels - 1; label1++) {
        for (int label2 = label1 + 1; label2 < numLabels; label2++) {
            Instance newInstanceFirstStage;
            //add predictions from the vurtual models
            if (instance instanceof SparseInstance) {
                newInstanceFirstStage = modifySparseInstance(instance, virtualMLO.getConfidences()[label1],
                        virtualMLO.getConfidences()[label2]);
            } else {
                newInstanceFirstStage = modifyDenseInstance(instance, virtualMLO.getConfidences()[label1],
                        virtualMLO.getConfidences()[label2]);
            }

            // delete all labels and add a new atribute at the end
            Instance newInstance = RemoveAllLabels.transformInstance(newInstanceFirstStage, labelIndices);
            newInstance.insertAttributeAt(newInstance.numAttributes());

            if (!nodata[counter]) {
                if (confidenceFromVirtualModels[label1] > threshold
                        && confidenceFromVirtualModels[label2] > threshold) {
                    double distribution[];
                    try {
                        newInstance.setDataset(metaDataTest[counter]);
                        distribution = oneVsOneModels[counter].distributionForInstance(newInstance);
                    } catch (Exception e) {
                        System.out.println(e);
                        return null;
                    }
                    int maxIndex = (distribution[0] > distribution[1]) ? 0 : 1;
                    // Ensure correct predictions both for class values {0,1} and {1,0}
                    Attribute classAttribute = metaDataTest[counter].classAttribute();

                    if (classAttribute.value(maxIndex).equals("1")) {
                        voteLabel[label1]++;
                    } else {
                        voteLabel[label2]++;
                    }
                } else if (confidenceFromVirtualModels[label1] > threshold) {
                    voteLabel[label1]++;
                } else if (confidenceFromVirtualModels[label2] > threshold) {
                    voteLabel[label2]++;
                } else {
                    noVoteLabel[label1]++;
                    noVoteLabel[label2]++;
                }
            }

            counter++;
        }

    }

    avgForwards += forwards;

    for (int i = 0; i < numLabels; i++) {
        if (voteLabel[i] >= voteVirtual) {
            bipartition[i] = true;
            confidences[i] = (1.0 * voteLabel[i]) / (numLabels - noVoteLabel[i]);
        } else {
            bipartition[i] = false;
            confidences[i] = 1.0 * confidenceFromVirtualModels[i] / numLabels;
            //confidences[i]=confidenceFromVirtualModels[i];
        }
        //System.out.println(bipartition[i]);
        //System.out.println(confidences[i]);
        //confidences[i]*=confidenceFromVirtualModels[i];
    }

    MultiLabelOutput mlo = new MultiLabelOutput(bipartition, confidences);
    return mlo;
}

From source file:mulan.classifier.transformation.TwoStagePrunedClassifierChainArchitecture.java

License:Open Source License

private MultiLabelInstances GenerateChain(MultiLabelInstances trainingSet, int label1, int label2,
        ArrayList<MultiLabelOutput> predictions) throws Exception {

    MultiLabelInstances tempTrainingSet = new MultiLabelInstances(
            new Instances(trainingSet.getDataSet(), trainingSet.getDataSet().numInstances()),
            trainingSet.getLabelsMetaData());

    tempTrainingSet.getDataSet().insertAttributeAt(new Attribute("0vs" + label1), 0);
    tempTrainingSet.getDataSet().insertAttributeAt(new Attribute("0vs" + label2), 0);

    for (int i = 0; i < trainingSet.getDataSet().numInstances(); i++) {

        Instance transformed;

        if (trainingSet.getDataSet().instance(i) instanceof SparseInstance) {
            transformed = modifySparseInstance(trainingSet.getDataSet().instance(i),
                    predictions.get(i).getConfidences()[label1], predictions.get(i).getConfidences()[label2]);
        } else {/*from   www. j  av  a 2s .co  m*/
            transformed = modifyDenseInstance(trainingSet.getDataSet().instance(i),
                    predictions.get(i).getConfidences()[label1], predictions.get(i).getConfidences()[label2]);
        }

        tempTrainingSet.getDataSet().add(transformed);
        transformed.setDataset(tempTrainingSet.getDataSet());
    }

    return tempTrainingSet;

    //        Instances td = new Instances(trainingSet.getDataSet(), trainingSet.getDataSet().numInstances());

    //        for(int i = trainingSet.getNumLabels()-1; i>=0; i--)
    //        {
    //            td.insertAttributeAt(new Attribute("0vs" + i+1), 0);
    //        }
    //
    //        for (int i = 0; i < trainingSet.getDataSet().numInstances(); i++)
    //        {
    //
    //            MultiLabelOutput output = virtualLabelModels.makePrediction(trainingSet.getDataSet().instance(i));
    //
    //            Instance transformed = modifyInstance(trainingSet.getDataSet().instance(i), output.getBipartition());
    //            td.add(transformed);
    //            transformed.setDataset(td);
    //        }
    //
    //        return td;
}

From source file:mulan.data.ConverterLibSVM.java

License:Open Source License

/**
 * Converts a multi-label dataset from LibSVM format to the format
 * that is compatible with Mulan. It constructs one ARFF and one XML file. 
 *
 * @param path the directory that contains the source file and will contain 
 * the target files//from  w ww.ja va  2s. c o  m
 * @param sourceFilename the name of the source file
 * @param relationName the relation name of the arff file that will be 
 * constructed
 * @param targetFilestem the filestem for the target files (.arff and .xml)
 */
public static void convertFromLibSVM(String path, String sourceFilename, String targetFilestem,
        String relationName) {
    BufferedReader aReader = null;
    BufferedWriter aWriter = null;

    int numLabels = 0;
    int numAttributes = 0;
    int numInstances = 0;
    double meanParsedAttributes = 0;

    // Calculate number of labels and attributes

    String Line = null;
    try {
        aReader = new BufferedReader(new FileReader(path + sourceFilename));

        while ((Line = aReader.readLine()) != null) {
            numInstances++;

            StringTokenizer strTok = new StringTokenizer(Line, " ");
            while (strTok.hasMoreTokens()) {
                String token = strTok.nextToken();

                if (token.indexOf(":") == -1) {
                    // parse label info
                    StringTokenizer labelTok = new StringTokenizer(token, ",");
                    while (labelTok.hasMoreTokens()) {
                        String strLabel = labelTok.nextToken();
                        int intLabel = Integer.parseInt(strLabel);
                        if (intLabel > numLabels) {
                            numLabels = intLabel;
                        }
                    }
                } else {
                    // parse attribute info
                    meanParsedAttributes++;
                    StringTokenizer attrTok = new StringTokenizer(token, ":");
                    String strAttrIndex = attrTok.nextToken();
                    int intAttrIndex = Integer.parseInt(strAttrIndex);
                    if (intAttrIndex > numAttributes) {
                        numAttributes = intAttrIndex;
                    }
                }
            }
        }

        numLabels++;

        System.out.println("Number of attributes: " + numAttributes);
        System.out.println("Number of instances: " + numInstances);
        System.out.println("Number of classes: " + numLabels);

        System.out.println("Constructing XML file... ");
        LabelsMetaDataImpl meta = new LabelsMetaDataImpl();
        for (int label = 0; label < numLabels; label++) {
            meta.addRootNode(new LabelNodeImpl("Label" + (label + 1)));
        }

        String labelsFilePath = path + targetFilestem + ".xml";
        try {
            LabelsBuilder.dumpLabels(meta, labelsFilePath);
            System.out.println("Done!");
        } catch (LabelsBuilderException e) {
            File labelsFile = new File(labelsFilePath);
            if (labelsFile.exists()) {
                labelsFile.delete();
            }
            System.out.println("Construction of labels XML failed!");
        }

        meanParsedAttributes /= numInstances;
        boolean Sparse = false;
        if (meanParsedAttributes < numAttributes) {
            Sparse = true;
            System.out.println("Dataset is sparse.");
        }

        // Define Instances class to hold data
        ArrayList<Attribute> attInfo = new ArrayList<Attribute>(numAttributes + numLabels);
        Attribute[] att = new Attribute[numAttributes + numLabels];

        for (int i = 0; i < numAttributes; i++) {
            att[i] = new Attribute("Att" + (i + 1));
            attInfo.add(att[i]);
        }
        ArrayList<String> ClassValues = new ArrayList<String>(2);
        ClassValues.add("0");
        ClassValues.add("1");
        for (int i = 0; i < numLabels; i++) {
            att[numAttributes + i] = new Attribute("Label" + (i + 1), ClassValues);
            attInfo.add(att[numAttributes + i]);
        }

        // Re-read file and convert into multi-label arff
        int countInstances = 0;

        aWriter = new BufferedWriter(new FileWriter(path + targetFilestem + ".arff"));
        Instances data = new Instances(relationName, attInfo, 0);
        aWriter.write(data.toString());

        aReader = new BufferedReader(new FileReader(path + sourceFilename));

        while ((Line = aReader.readLine()) != null) {
            countInstances++;

            // set all  values to 0
            double[] attValues = new double[numAttributes + numLabels];
            Arrays.fill(attValues, 0);

            Instance tempInstance = new DenseInstance(1, attValues);
            tempInstance.setDataset(data);

            // separate class info from attribute info
            // ensure class info exists
            StringTokenizer strTok = new StringTokenizer(Line, " ");

            while (strTok.hasMoreTokens()) {
                String token = strTok.nextToken();

                if (token.indexOf(":") == -1) {
                    // parse label info
                    StringTokenizer labelTok = new StringTokenizer(token, ",");
                    while (labelTok.hasMoreTokens()) {
                        String strLabel = labelTok.nextToken();
                        int intLabel = Integer.parseInt(strLabel);
                        tempInstance.setValue(numAttributes + intLabel, 1);
                    }
                } else {
                    // parse attribute info
                    StringTokenizer AttrTok = new StringTokenizer(token, ":");
                    String strAttrIndex = AttrTok.nextToken();
                    String strAttrValue = AttrTok.nextToken();
                    tempInstance.setValue(Integer.parseInt(strAttrIndex) - 1, Double.parseDouble(strAttrValue));
                }
            }

            if (Sparse) {
                SparseInstance tempSparseInstance = new SparseInstance(tempInstance);
                aWriter.write(tempSparseInstance.toString() + "\n");
            } else {
                aWriter.write(tempInstance.toString() + "\n");
            }

        }
    } catch (IOException e) {
        e.printStackTrace();
    } finally {
        try {
            if (aReader != null) {
                aReader.close();
            }
            if (aWriter != null) {
                aWriter.close();
            }
        } catch (IOException ex) {
            ex.printStackTrace();
        }
    }
}

From source file:mulan.regressor.transformation.RegressorChainSimple.java

License:Open Source License

protected MultiLabelOutput makePredictionInternal(Instance instance) throws Exception {
    double[] scores = new double[numLabels];

    // create a new temporary instance so that the passed instance is not altered
    Instances dataset = instance.dataset();
    Instance tempInstance = DataUtils.createInstance(instance, instance.weight(), instance.toDoubleArray());

    for (int counter = 0; counter < numLabels; counter++) {
        dataset.setClassIndex(chain[counter]);
        tempInstance.setDataset(dataset);
        // find the appropriate position for that score in the scores array
        // i.e. which is the corresponding target
        int pos = 0;
        for (int i = 0; i < numLabels; i++) {
            if (chain[counter] == labelIndices[i]) {
                pos = i;// ww w  . j  a va  2s  .c  o m
                break;
            }
        }
        scores[pos] = chainRegressors[counter].classifyInstance(tempInstance);
        tempInstance.setValue(chain[counter], scores[pos]);
    }

    MultiLabelOutput mlo = new MultiLabelOutput(scores, true);
    return mlo;
}

From source file:mulan.regressor.transformation.SingleTargetRegressor.java

License:Open Source License

protected MultiLabelOutput makePredictionInternal(Instance instance) throws Exception {
    double[] scores = new double[numLabels];
    Instances dataset = instance.dataset();

    for (int counter = 0; counter < numLabels; counter++) {
        dataset.setClassIndex(labelIndices[counter]);
        instance.setDataset(dataset);
        scores[counter] = stRegressors[counter].classifyInstance(instance);
    }/*from   ww w .j  a v  a 2 s .c  o m*/

    MultiLabelOutput mlo = new MultiLabelOutput(scores, true);

    return mlo;
}

From source file:mulan.transformations.BinaryRelevanceTransformation.java

License:Open Source License

/**
 * Remove all label attributes except labelToKeep
 * @param instance /*from w  w  w. ja v  a2 s  .  co m*/
 * @param labelToKeep 
 * @return transformed Instance
 */
public Instance transformInstance(Instance instance, int labelToKeep) {
    Instance newInstance = DataUtils.createInstance(instance, instance.numAttributes());
    newInstance.setDataset(null);
    int numPredictors = instance.numAttributes() - numOfLabels;
    int skipLabel = 0;
    for (int labelIndex = 0; labelIndex < numOfLabels; labelIndex++) {
        if (labelIndex == labelToKeep) {
            skipLabel++;
            continue;
        }
        newInstance.deleteAttributeAt(numPredictors + skipLabel);
    }
    return newInstance;
}

From source file:mulan.transformations.IncludeLabelsTransformation.java

License:Open Source License

/**
 *
 * @param mlData multi-label data//from  w ww. ja va  2s.com
 * @return transformed instances
 * @throws Exception Potential exception thrown. To be handled in an upper level.
 */
public Instances transformInstances(MultiLabelInstances mlData) throws Exception {
    int numLabels = mlData.getNumLabels();
    labelIndices = mlData.getLabelIndices();

    // remove all labels
    Instances transformed = RemoveAllLabels.transformInstances(mlData);

    // add at the end an attribute with values the label names
    ArrayList<String> labelNames = new ArrayList<String>(numLabels);
    for (int counter = 0; counter < numLabels; counter++) {
        labelNames.add(mlData.getDataSet().attribute(labelIndices[counter]).name());
    }
    Attribute attrLabel = new Attribute("Label", labelNames);
    transformed.insertAttributeAt(attrLabel, transformed.numAttributes());

    // and at the end a binary attribute
    ArrayList<String> binaryValues = new ArrayList<String>(2);
    binaryValues.add("0");
    binaryValues.add("1");
    Attribute classAttr = new Attribute("Class", binaryValues);
    transformed.insertAttributeAt(classAttr, transformed.numAttributes());

    // add instances
    transformed = new Instances(transformed, 0);
    transformed.setClassIndex(transformed.numAttributes() - 1);
    Instances data = mlData.getDataSet();
    for (int instanceIndex = 0; instanceIndex < data.numInstances(); instanceIndex++) {
        for (int labelCounter = 0; labelCounter < numLabels; labelCounter++) {
            Instance temp;
            temp = RemoveAllLabels.transformInstance(data.instance(instanceIndex), labelIndices);
            temp.setDataset(null);
            temp.insertAttributeAt(temp.numAttributes());
            temp.insertAttributeAt(temp.numAttributes());
            temp.setDataset(transformed);
            temp.setValue(temp.numAttributes() - 2, (String) labelNames.get(labelCounter));
            if (data.attribute(labelIndices[labelCounter])
                    .value((int) data.instance(instanceIndex).value(labelIndices[labelCounter])).equals("1")) {
                temp.setValue(temp.numAttributes() - 1, "1");
            } else {
                temp.setValue(temp.numAttributes() - 1, "0");
            }
            transformed.add(temp);
        }
    }

    return transformed;
}

From source file:mulan.transformations.IncludeLabelsTransformation.java

License:Open Source License

/**
 * Transform an unlabeled instance to the format expected by
 * the binary classifier//www  . ja  v a  2s.c  o m
 *
 * @param instance an unlabeled instance
 * @return a transformed unlabeled instance
 * @throws Exception Potential exception thrown. To be handled in an upper level.
 */
public Instance transformInstance(Instance instance) throws Exception {
    if (labelIndices == null) {
        System.out.println("Label Indices not set!!");
        return null;
    }
    Instance transformedInstance = RemoveAllLabels.transformInstance(instance, labelIndices);
    transformedInstance.setDataset(null);
    transformedInstance.insertAttributeAt(transformedInstance.numAttributes());
    transformedInstance.insertAttributeAt(transformedInstance.numAttributes());
    return transformedInstance;
}

From source file:mulan.transformations.LabelPowersetTransformation.java

License:Open Source License

public Instance transformInstance(Instance instance, int[] labelIndices) throws Exception {
    Instance transformedInstance = RemoveAllLabels.transformInstance(instance, labelIndices);
    transformedInstance.setDataset(null);
    transformedInstance.insertAttributeAt(transformedInstance.numAttributes());
    transformedInstance.setDataset(transformedFormat);
    return transformedInstance;
}