Example usage for weka.core Instance setDataset

Introduction

In this page you can find the example usage for weka.core Instance setDataset.

Prototype

public void setDataset(Instances instances);

Source Link

Document

Sets the reference to the dataset.

Usage

From source file:de.upb.timok.utils.DatasetTransformationUtils.java

License:Open Source License

public static Instances trainingSetToInstances(List<double[]> trainingSet) {
    final double[] sample = trainingSet.get(0);
    final ArrayList<Attribute> fvWekaAttributes = new ArrayList<>(sample.length + 1);
    for (int i = 0; i < sample.length; i++) {
        fvWekaAttributes.add(new Attribute(Integer.toString(i)));
    }/* www  .ja  va2  s  . com*/

    final ArrayList<String> classStrings = new ArrayList<>();
    classStrings.add("normal");
    final Attribute ClassAttribute = new Attribute("class", classStrings);

    // Declare the feature vector
    fvWekaAttributes.add(ClassAttribute);
    final Instances result = new Instances("trainingSet", fvWekaAttributes, trainingSet.size());
    result.setClass(ClassAttribute);
    result.setClassIndex(fvWekaAttributes.size() - 1);
    for (final double[] instance : trainingSet) {
        final double[] newInstance = Arrays.copyOf(instance, instance.length + 1);
        newInstance[newInstance.length - 1] = 0;
        final Instance wekaInstance = new DenseInstance(1, newInstance);
        wekaInstance.setDataset(result);
        result.add(wekaInstance);
    }
    return result;
}

From source file:de.upb.timok.utils.DatasetTransformationUtils.java

License:Open Source License

public static Instances testSetToInstances(List<double[]> testSet) {
    if (testSet.size() == 0) {
        logger.warn("TestSet has size 0");
    }//from   www  .  ja  va2s  . c  om
    final double[] sample = testSet.get(0);
    final ArrayList<Attribute> fvWekaAttributes = new ArrayList<>(sample.length);
    for (int i = 0; i < sample.length; i++) {
        fvWekaAttributes.add(new Attribute(Integer.toString(i)));
    }
    final ArrayList<String> classStrings = new ArrayList<>();
    classStrings.add("normal");
    final Attribute ClassAttribute = new Attribute("class", classStrings);
    fvWekaAttributes.add(ClassAttribute);

    // Declare the feature vector
    final Instances result = new Instances("testSet", fvWekaAttributes, testSet.size());
    result.setClassIndex(fvWekaAttributes.size() - 1);
    for (final double[] instance : testSet) {
        final Instance wekaInstance = new DenseInstance(1, instance);
        wekaInstance.setDataset(result);
        result.add(wekaInstance);
    }
    return result;
}

From source file:edu.brandeis.wisedb.scheduler.training.decisiontree.DTSearcher.java

License:Open Source License

@Override
public List<Action> schedule(Set<ModelQuery> toSched) {
    SingularMachineState start = new SingularMachineState(toSched, qtp, sla);
    List<Action> toR = new LinkedList<Action>();

    applyLoop: while (!start.isGoalState()) {
        log.fine("Current state: " + start);

        SortedMap<String, String> features = start.getFeatures();
        Instance toClassify = new Instance(attributes.length);
        toClassify.setDataset(wekaDataSet);

        for (Attribute a : attributes) {
            if (a.name().equals("action")) {
                //toClassify.setValue(a, "N");
                continue;
            }/*w ww  . j  a  v a 2  s  . c  om*/

            try {

                if (features.get(a.name()).equals("?")) {
                    toClassify.setMissing(a);
                    continue;
                }
                try {
                    double d = Double.valueOf(features.get(a.name()));
                    toClassify.setValue(a, d);
                } catch (NumberFormatException e) {
                    toClassify.setValue(a, features.get(a.name()));
                }
            } catch (IllegalArgumentException e) {
                e.printStackTrace();
                log.warning(
                        "Encountered previously unseen attribute value! Might need better training data... making random selection.");
                log.warning("Value for attribute " + a.name() + " was " + features.get(a.name()));
                Action rand = getPUAction(start);
                log.warning("Random action selected: " + rand);
                toR.add(rand);
                start.applyAction(rand);
                continue applyLoop;
            }
        }

        toClassify.setClassMissing();
        log.finer("Going to classify: " + toClassify);

        try {
            double d = tree.classifyInstance(toClassify);
            toClassify.setClassValue(d);
            String action = toClassify.stringValue(toClassify.classIndex());
            log.finer("Got action string: " + action);

            Action selected = null;
            for (Action a : start.getPossibleActions()) {
                if (actionMatches(a, action)) {
                    selected = a;
                    break;
                }
            }

            if (selected == null) {
                //log.warning("Could not find applicable action for string: " + action + " ... picking random action");
                Action a = getPUAction(start);
                start.applyAction(a);
                toR.add(a);
                continue;
            }

            log.fine("Selected action: " + selected);

            start.applyAction(selected);

            toR.add(selected);

        } catch (Exception e) {
            // TODO Auto-generated catch block
            e.printStackTrace();
            return null;
        }
    }

    return toR;
}

From source file:edu.cmu.lti.oaqa.baseqa.providers.ml.classifiers.MekaProvider.java

License:Apache License

@Override
public Map<String, Double> infer(Map<String, Double> features) throws AnalysisEngineProcessException {
    Instance instance = new SparseInstance(features.size());
    instance.setDataset(datasetSchema);
    for (Map.Entry<String, Double> e : features.entrySet()) {
        Attribute attribute = datasetSchema.attribute(e.getKey());
        if (attribute == null)
            continue;
        instance.setValue(attribute, e.getValue());
    }// ww w  .j  a va  2  s  . co m
    double[] probs;
    try {
        probs = classifier.distributionForInstance(instance);
    } catch (Exception e) {
        throw new AnalysisEngineProcessException(e);
    }
    assert datasetSchema.classIndex() == probs.length;
    return IntStream.range(0, probs.length).boxed()
            .collect(toMap(i -> datasetSchema.attribute(i).name(), i -> probs[i]));
}

From source file:edu.cuny.qc.speech.AuToBI.util.ClassifierUtils.java

License:Open Source License

/**
 * Given a (possibly empty) Instances object containing the required weka Attributes, generates a weka Instance for a
 * single data point./*  www.j  a v  a2s .com*/
 *
 * @param instances  the weka Instances object containing attributes
 * @param data_point the data point to convert
 * @return a weka instance with assigned attributes
 */
protected static Instance assignWekaAttributes(Instances instances, Word data_point) {
    double[] instance = new double[instances.numAttributes()];

    for (int i = 0; i < instances.numAttributes(); ++i) {
        Attribute attribute = instances.attribute(i);
        if (data_point.hasAttribute(attribute.name())
                && !data_point.getAttribute(attribute.name()).toString().equals("?")) {
            switch (attribute.type()) {
            case Attribute.NOMINAL:
                int index = attribute.indexOfValue(data_point.getAttribute(attribute.name()).toString());
                instance[i] = (double) index;
                break;
            case Attribute.NUMERIC:
                // Check if value is really a number.
                try {
                    instance[i] = Double.valueOf(data_point.getAttribute(attribute.name()).toString());
                } catch (NumberFormatException e) {
                    AuToBIUtils.error("Number expected for feature: " + attribute.name());
                }
                break;
            case Attribute.STRING:
                instance[i] = attribute.addStringValue(data_point.getAttribute(attribute.name()).toString());
                break;
            default:
                AuToBIUtils.error("Unknown attribute type");
            }
        } else {
            instance[i] = Utils.missingValue();
        }
    }

    Instance inst = new DenseInstance(1, instance);
    inst.setDataset(instances);
    return inst;
}

From source file:edu.illinois.cs.cogcomp.lbjava.learn.WekaWrapper.java

License:Open Source License

/**
 * Creates a WEKA Instance object out of a {@link FeatureVector}.
 **//*w w  w . jav a  2s .  c o m*/
private Instance makeInstance(int[] exampleFeatures, double[] exampleValues, int[] exampleLabels,
        double[] labelValues) {
    // Make sure attributeInfo has been filled
    if (attributeInfo.size() == 0) {
        System.err.println("WekaWrapper: Error - makeInstance was called while attributeInfo " + "was empty.");
        new Exception().printStackTrace();
        System.exit(1);
    }

    // Initialize an Instance object
    Instance inst = new Instance(attributeInfo.size());

    // Acknowledge that this instance will be a member of our dataset
    // 'instances'
    inst.setDataset(instances);

    // Assign values for its attributes
    /*
     * Since we are iterating through this example's feature list, which does not contain the
     * label feature (the label feature is the first in the 'attribute' list), we start attIndex
     * at 1, while we start featureIndex at 0.
     */
    for (int featureIndex = 0, attIndex = 1; featureIndex < exampleFeatures.length; ++featureIndex, ++attIndex) {
        Feature f = (Feature) lexicon.lookupKey(exampleFeatures[featureIndex]);
        Attribute att = (Attribute) attributeInfo.elementAt(attIndex);

        // make sure the feature's identifier and the attribute's name match
        if (!(att.name().equals(f.getStringIdentifier()))) {
            System.err.println(
                    "WekaWrapper: Error - makeInstance encountered a misaligned " + "attribute-feature pair.");
            System.err.println(
                    "  " + att.name() + " and " + f.getStringIdentifier() + " should have been identical.");
            new Exception().printStackTrace();
            System.exit(1);
        }

        if (!f.isDiscrete())
            inst.setValue(attIndex, exampleValues[featureIndex]);
        else { // it's a discrete or conjunctive feature.
            String attValue = f.totalValues() == 2 ? att.value((int) exampleValues[featureIndex])
                    : f.getStringValue();
            inst.setValue(attIndex, attValue);
        }
    }

    /*
     * Here, we assume that if either the labels FeatureVector is empty of features, or is null,
     * then this example is to be considered unlabeled.
     */
    if (exampleLabels.length == 0) {
        inst.setClassMissing();
    } else if (exampleLabels.length > 1) {
        System.err.println("WekaWrapper: Error - Weka Instances may only take a single class " + "value, ");
        new Exception().printStackTrace();
        System.exit(1);
    } else {
        Feature label = labelLexicon.lookupKey(exampleLabels[0]);

        // make sure the name of the label feature matches the name of the 0'th
        // attribute
        if (!(label.getStringIdentifier().equals(((Attribute) attributeInfo.elementAt(0)).name()))) {
            System.err.println("WekaWrapper: Error - makeInstance found the wrong label name.");
            new Exception().printStackTrace();
            System.exit(1);
        }

        if (!label.isDiscrete())
            inst.setValue(0, labelValues[0]);
        else
            inst.setValue(0, label.getStringValue());
    }

    return inst;
}

From source file:edu.illinois.cs.cogcomp.saul.learn.SaulWekaWrapper.java

License:Open Source License

/**
 * Creates a WEKA Instance object out of a {@link FeatureVector}.
 **///www  .ja  va 2  s . c om
private Instance makeInstance(LBJavaInstance instance) {

    // Initialize an Instance object
    Instance inst = new Instance(attributeInfo.size());

    // Acknowledge that this instance will be a member of our dataset 'wekaInstances'
    inst.setDataset(wekaInstances);

    // set all nominal feature values to 0, which means those features are not used in this example
    for (int i = 1; i < attributeInfo.size(); i++)
        if (inst.attribute(i).isNominal())
            inst.setValue(i, "0");

    // Assign values for its attributes
    /*
     * Since we are iterating through this example's feature list, which does not contain the
     * label feature (the label feature is the first in the 'attribute' list), we set attIndex
     * to at exampleFeatures[featureIndices] + 1, while we start featureIndices at 0.
     */
    for (int featureIndex = 0; featureIndex < instance.featureIndices.length; ++featureIndex) {
        int attIndex = instance.featureIndices[featureIndex] + 1;
        Feature f = lexicon.lookupKey(instance.featureIndices[featureIndex]);

        // if the feature does not exist, do nothing. this may occur in test set.
        if (f == null)
            continue;
        Attribute att = (Attribute) attributeInfo.elementAt(attIndex);

        // make sure the feature and the attribute match
        if (!(att.name().equals(f.toString()))) {
            System.err.println(
                    "WekaWrapper: Error - makeInstance encountered a misaligned " + "attribute-feature pair.");
            System.err.println("  " + att.name() + " and " + f.toString() + " should have been identical.");
            new Exception().printStackTrace();
            System.exit(1);
        }
        if (f.isDiscrete())
            inst.setValue(attIndex, "1"); // this feature is used in this example so we set it to "1"
        else
            inst.setValue(attIndex, instance.featureValues[featureIndex]);

    }

    /*
     * Here, we assume that if either the labels FeatureVector is empty of features, or is null,
     * then this example is to be considered unlabeled.
     */
    if (instance.labelIndices.length == 0) {
        inst.setClassMissing();
    } else if (instance.labelIndices.length > 1) {
        System.err.println("WekaWrapper: Error - Weka Instances may only take a single class " + "value, ");
        new Exception().printStackTrace();
        System.exit(1);
    } else {
        Feature label = labelLexicon.lookupKey(instance.labelIndices[0]);

        // make sure the label feature matches the n 0'th attribute
        if (!(label.getGeneratingClassifier().equals(((Attribute) attributeInfo.elementAt(0)).name()))) {
            System.err.println("WekaWrapper: Error - makeInstance found the wrong label name.");
            new Exception().printStackTrace();
            System.exit(1);
        }

        if (!label.isDiscrete())
            inst.setValue(0, instance.labelValues[0]);
        else
            inst.setValue(0, label.getStringValue());
    }

    return inst;
}

From source file:edu.oregonstate.eecs.mcplan.abstraction.EvaluateSimilarityFunction.java

License:Open Source License

public static Instances transformInstances(final Instances src, final CoordinateTransform transform) {
    final ArrayList<Attribute> out_attributes = new ArrayList<Attribute>();
    for (int i = 0; i < transform.outDimension(); ++i) {
        out_attributes.add(new Attribute("x" + i));
    }//from  w  ww. j a  va 2 s.  co m
    out_attributes.add((Attribute) src.classAttribute().copy());
    final Instances out = new Instances(src.relationName() + "_" + transform.name(), out_attributes, 0);
    for (int i = 0; i < src.size(); ++i) {
        final Instance inst = src.get(i);
        final RealVector flat = new ArrayRealVector(WekaUtil.unlabeledFeatures(inst));
        final RealVector transformed_vector = transform.encode(flat).x;
        final double[] transformed = new double[transformed_vector.getDimension() + 1];
        for (int j = 0; j < transformed_vector.getDimension(); ++j) {
            transformed[j] = transformed_vector.getEntry(j);
        }
        transformed[transformed.length - 1] = inst.classValue();
        final Instance transformed_instance = new DenseInstance(inst.weight(), transformed);
        out.add(transformed_instance);
        transformed_instance.setDataset(out);
    }
    out.setClassIndex(out.numAttributes() - 1);
    return out;
}

From source file:edu.oregonstate.eecs.mcplan.abstraction.EvaluateSimilarityFunction.java

License:Open Source License

/**
 * @param args/*from  w w w .  j  a  va  2s  .c o  m*/
 * @throws IOException
 * @throws FileNotFoundException
 */
public static void main(final String[] args) throws FileNotFoundException, IOException {
    final String experiment_file = args[0];
    final File root_directory;
    if (args.length > 1) {
        root_directory = new File(args[1]);
    } else {
        root_directory = new File(".");
    }
    final CsvConfigurationParser csv_config = new CsvConfigurationParser(new FileReader(experiment_file));
    final String experiment_name = FilenameUtils.getBaseName(experiment_file);

    final File expr_directory = new File(root_directory, experiment_name);
    expr_directory.mkdirs();

    final Csv.Writer csv = new Csv.Writer(
            new PrintStream(new FileOutputStream(new File(expr_directory, "results.csv"))));
    final String[] parameter_headers = new String[] { "kpca.kernel", "kpca.rbf.sigma",
            "kpca.random_forest.Ntrees", "kpca.random_forest.max_depth", "kpca.Nbases", "multiclass.classifier",
            "multiclass.random_forest.Ntrees", "multiclass.random_forest.max_depth",
            "pairwise_classifier.max_branching", "training.label_noise" };
    csv.cell("domain").cell("abstraction");
    for (final String p : parameter_headers) {
        csv.cell(p);
    }
    csv.cell("Ntrain").cell("Ntest").cell("ami.mean").cell("ami.variance").cell("ami.confidence").newline();

    for (int expr = 0; expr < csv_config.size(); ++expr) {
        try {
            final KeyValueStore expr_config = csv_config.get(expr);
            final Configuration config = new Configuration(root_directory.getPath(), expr_directory.getName(),
                    expr_config);

            System.out.println("[Loading '" + config.training_data_single + "']");
            final Instances single = WekaUtil
                    .readLabeledDataset(new File(root_directory, config.training_data_single + ".arff"));

            final Instances train = new Instances(single, 0);
            final int[] idx = Fn.range(0, single.size());
            int instance_counter = 0;
            Fn.shuffle(config.rng, idx);
            final int Ntrain = config.getInt("Ntrain_games"); // TODO: Rename?
            final double label_noise = config.getDouble("training.label_noise");
            final int Nlabels = train.classAttribute().numValues();
            assert (Nlabels > 0);
            for (int i = 0; i < Ntrain; ++i) {
                final Instance inst = single.get(idx[instance_counter++]);
                if (label_noise > 0 && config.rng.nextDouble() < label_noise) {
                    int noisy_label = 0;
                    do {
                        noisy_label = config.rng.nextInt(Nlabels);
                    } while (noisy_label == (int) inst.classValue());
                    System.out.println("Noisy label (" + inst.classValue() + " -> " + noisy_label + ")");
                    inst.setClassValue(noisy_label);
                }
                train.add(inst);
                inst.setDataset(train);
            }

            final Fn.Function2<Boolean, Instance, Instance> plausible_p = createPlausiblePredicate(config);

            final int Ntest = config.Ntest_games;
            int Ntest_added = 0;
            final ArrayList<Instances> tests = new ArrayList<Instances>();
            while (instance_counter < single.size() && Ntest_added < Ntest) {
                final Instance inst = single.get(idx[instance_counter++]);
                boolean found = false;
                for (final Instances test : tests) {
                    // Note that 'plausible_p' should be transitive
                    if (plausible_p.apply(inst, test.get(0))) {
                        WekaUtil.addInstance(test, inst);
                        if (test.size() == 30) {
                            Ntest_added += test.size();
                        } else if (test.size() > 30) {
                            Ntest_added += 1;
                        }
                        found = true;
                        break;
                    }
                }

                if (!found) {
                    final Instances test = new Instances(single, 0);
                    WekaUtil.addInstance(test, inst);
                    tests.add(test);
                }
            }
            final Iterator<Instances> test_itr = tests.iterator();
            while (test_itr.hasNext()) {
                if (test_itr.next().size() < 30) {
                    test_itr.remove();
                }
            }
            System.out.println("=== tests.size() = " + tests.size());
            System.out.println("=== Ntest_added = " + Ntest_added);

            System.out.println("[Training]");
            final Evaluator evaluator = createEvaluator(config, train);
            //            final Instances transformed_test = evaluator.prepareInstances( test );

            System.out.println("[Evaluating]");

            final int Nxval = evaluator.isSensitiveToOrdering() ? 10 : 1;
            final MeanVarianceAccumulator ami = new MeanVarianceAccumulator();

            final MeanVarianceAccumulator errors = new MeanVarianceAccumulator();
            final MeanVarianceAccumulator relative_error = new MeanVarianceAccumulator();

            int c = 0;
            for (int xval = 0; xval < Nxval; ++xval) {
                for (final Instances test : tests) {
                    // TODO: Debugging
                    WekaUtil.writeDataset(new File(config.root_directory), "test_" + (c++), test);

                    //               transformed_test.randomize( new RandomAdaptor( config.rng ) );
                    //               final ClusterContingencyTable ct = evaluator.evaluate( transformed_test );
                    test.randomize(new RandomAdaptor(config.rng));
                    final ClusterContingencyTable ct = evaluator.evaluate(test);
                    System.out.println(ct);

                    int Nerrors = 0;
                    final MeanVarianceAccumulator mv = new MeanVarianceAccumulator();
                    for (int i = 0; i < ct.R; ++i) {
                        final int max = Fn.max(ct.n[i]);
                        Nerrors += (ct.a[i] - max);
                        mv.add(((double) ct.a[i]) / ct.N * Nerrors / ct.a[i]);
                    }
                    errors.add(Nerrors);
                    relative_error.add(mv.mean());

                    System.out.println("exemplar: " + test.get(0));
                    System.out.println("Nerrors = " + Nerrors);
                    final PrintStream ct_out = new PrintStream(
                            new FileOutputStream(new File(expr_directory, "ct_" + expr + "_" + xval + ".csv")));
                    ct.writeCsv(ct_out);
                    ct_out.close();
                    final double ct_ami = ct.adjustedMutualInformation_max();
                    if (Double.isNaN(ct_ami)) {
                        System.out.println("! ct_ami = NaN");
                    } else {
                        ami.add(ct_ami);
                    }
                    System.out.println();
                }
            }
            System.out.println("errors = " + errors.mean() + " (" + errors.confidence() + ")");
            System.out.println(
                    "relative_error = " + relative_error.mean() + " (" + relative_error.confidence() + ")");
            System.out.println("AMI_max = " + ami.mean() + " (" + ami.confidence() + ")");

            csv.cell(config.domain).cell(config.get("abstraction.discovery"));
            for (final String p : parameter_headers) {
                csv.cell(config.get(p));
            }
            csv.cell(Ntrain).cell(Ntest).cell(ami.mean()).cell(ami.variance()).cell(ami.confidence()).newline();
        } catch (final Exception ex) {
            ex.printStackTrace();
        }
    }
}

From source file:edu.oregonstate.eecs.mcplan.abstraction.MulticlassRepresenter.java

License:Open Source License

@Override
public Representation<S> encode(final FactoredRepresentation<S> x) {
    try {//from w  w  w  . j  a v a 2 s . com
        final Instance i = WekaUtil.labeledInstanceFromUnlabeledFeatures(headers_, x.phi());
        headers_.add(i);
        i.setDataset(headers_);
        final int c = (int) classifier_.classifyInstance(i);
        headers_.remove(0);
        return new ClusterAbstraction<S>(c);
    } catch (final RuntimeException ex) {
        throw ex;
    } catch (final Exception ex) {
        throw new RuntimeException(ex);
    }
}