Example usage for weka.core Instance setDataset

List of usage examples for weka.core Instance setDataset

Introduction

In this page you can find the example usage for weka.core Instance setDataset.

Prototype

public void setDataset(Instances instances);

Source Link

Document

Sets the reference to the dataset.

Usage

From source file:de.upb.timok.utils.DatasetTransformationUtils.java

License:Open Source License

public static Instances trainingSetToInstances(List<double[]> trainingSet) {
    final double[] sample = trainingSet.get(0);
    final ArrayList<Attribute> fvWekaAttributes = new ArrayList<>(sample.length + 1);
    for (int i = 0; i < sample.length; i++) {
        fvWekaAttributes.add(new Attribute(Integer.toString(i)));
    }/* www  .ja  va2  s  . com*/

    final ArrayList<String> classStrings = new ArrayList<>();
    classStrings.add("normal");
    final Attribute ClassAttribute = new Attribute("class", classStrings);

    // Declare the feature vector
    fvWekaAttributes.add(ClassAttribute);
    final Instances result = new Instances("trainingSet", fvWekaAttributes, trainingSet.size());
    result.setClass(ClassAttribute);
    result.setClassIndex(fvWekaAttributes.size() - 1);
    for (final double[] instance : trainingSet) {
        final double[] newInstance = Arrays.copyOf(instance, instance.length + 1);
        newInstance[newInstance.length - 1] = 0;
        final Instance wekaInstance = new DenseInstance(1, newInstance);
        wekaInstance.setDataset(result);
        result.add(wekaInstance);
    }
    return result;
}

From source file:de.upb.timok.utils.DatasetTransformationUtils.java

License:Open Source License

public static Instances testSetToInstances(List<double[]> testSet) {
    if (testSet.size() == 0) {
        logger.warn("TestSet has size 0");
    }//from   www  .  ja  va2s  . c  om
    final double[] sample = testSet.get(0);
    final ArrayList<Attribute> fvWekaAttributes = new ArrayList<>(sample.length);
    for (int i = 0; i < sample.length; i++) {
        fvWekaAttributes.add(new Attribute(Integer.toString(i)));
    }
    final ArrayList<String> classStrings = new ArrayList<>();
    classStrings.add("normal");
    final Attribute ClassAttribute = new Attribute("class", classStrings);
    fvWekaAttributes.add(ClassAttribute);

    // Declare the feature vector
    final Instances result = new Instances("testSet", fvWekaAttributes, testSet.size());
    result.setClassIndex(fvWekaAttributes.size() - 1);
    for (final double[] instance : testSet) {
        final Instance wekaInstance = new DenseInstance(1, instance);
        wekaInstance.setDataset(result);
        result.add(wekaInstance);
    }
    return result;
}

From source file:edu.brandeis.wisedb.scheduler.training.decisiontree.DTSearcher.java

License:Open Source License

@Override
public List<Action> schedule(Set<ModelQuery> toSched) {
    SingularMachineState start = new SingularMachineState(toSched, qtp, sla);
    List<Action> toR = new LinkedList<Action>();

    applyLoop: while (!start.isGoalState()) {
        log.fine("Current state: " + start);

        SortedMap<String, String> features = start.getFeatures();
        Instance toClassify = new Instance(attributes.length);
        toClassify.setDataset(wekaDataSet);

        for (Attribute a : attributes) {
            if (a.name().equals("action")) {
                //toClassify.setValue(a, "N");
                continue;
            }/*w ww  . j  a  v a 2  s  . c  om*/

            try {

                if (features.get(a.name()).equals("?")) {
                    toClassify.setMissing(a);
                    continue;
                }
                try {
                    double d = Double.valueOf(features.get(a.name()));
                    toClassify.setValue(a, d);
                } catch (NumberFormatException e) {
                    toClassify.setValue(a, features.get(a.name()));
                }
            } catch (IllegalArgumentException e) {
                e.printStackTrace();
                log.warning(
                        "Encountered previously unseen attribute value! Might need better training data... making random selection.");
                log.warning("Value for attribute " + a.name() + " was " + features.get(a.name()));
                Action rand = getPUAction(start);
                log.warning("Random action selected: " + rand);
                toR.add(rand);
                start.applyAction(rand);
                continue applyLoop;
            }
        }

        toClassify.setClassMissing();
        log.finer("Going to classify: " + toClassify);

        try {
            double d = tree.classifyInstance(toClassify);
            toClassify.setClassValue(d);
            String action = toClassify.stringValue(toClassify.classIndex());
            log.finer("Got action string: " + action);

            Action selected = null;
            for (Action a : start.getPossibleActions()) {
                if (actionMatches(a, action)) {
                    selected = a;
                    break;
                }
            }

            if (selected == null) {
                //log.warning("Could not find applicable action for string: " + action + " ... picking random action");
                Action a = getPUAction(start);
                start.applyAction(a);
                toR.add(a);
                continue;
            }

            log.fine("Selected action: " + selected);

            start.applyAction(selected);

            toR.add(selected);

        } catch (Exception e) {
            // TODO Auto-generated catch block
            e.printStackTrace();
            return null;
        }
    }

    return toR;
}

From source file:edu.cmu.lti.oaqa.baseqa.providers.ml.classifiers.MekaProvider.java

License:Apache License

@Override
public Map<String, Double> infer(Map<String, Double> features) throws AnalysisEngineProcessException {
    Instance instance = new SparseInstance(features.size());
    instance.setDataset(datasetSchema);
    for (Map.Entry<String, Double> e : features.entrySet()) {
        Attribute attribute = datasetSchema.attribute(e.getKey());
        if (attribute == null)
            continue;
        instance.setValue(attribute, e.getValue());
    }// ww w  .j  a va  2  s  . co m
    double[] probs;
    try {
        probs = classifier.distributionForInstance(instance);
    } catch (Exception e) {
        throw new AnalysisEngineProcessException(e);
    }
    assert datasetSchema.classIndex() == probs.length;
    return IntStream.range(0, probs.length).boxed()
            .collect(toMap(i -> datasetSchema.attribute(i).name(), i -> probs[i]));
}

From source file:edu.cuny.qc.speech.AuToBI.util.ClassifierUtils.java

License:Open Source License

/**
 * Given a (possibly empty) Instances object containing the required weka Attributes, generates a weka Instance for a
 * single data point./*  www.j  a v  a2s .com*/
 *
 * @param instances  the weka Instances object containing attributes
 * @param data_point the data point to convert
 * @return a weka instance with assigned attributes
 */
protected static Instance assignWekaAttributes(Instances instances, Word data_point) {
    double[] instance = new double[instances.numAttributes()];

    for (int i = 0; i < instances.numAttributes(); ++i) {
        Attribute attribute = instances.attribute(i);
        if (data_point.hasAttribute(attribute.name())
                && !data_point.getAttribute(attribute.name()).toString().equals("?")) {
            switch (attribute.type()) {
            case Attribute.NOMINAL:
                int index = attribute.indexOfValue(data_point.getAttribute(attribute.name()).toString());
                instance[i] = (double) index;
                break;
            case Attribute.NUMERIC:
                // Check if value is really a number.
                try {
                    instance[i] = Double.valueOf(data_point.getAttribute(attribute.name()).toString());
                } catch (NumberFormatException e) {
                    AuToBIUtils.error("Number expected for feature: " + attribute.name());
                }
                break;
            case Attribute.STRING:
                instance[i] = attribute.addStringValue(data_point.getAttribute(attribute.name()).toString());
                break;
            default:
                AuToBIUtils.error("Unknown attribute type");
            }
        } else {
            instance[i] = Utils.missingValue();
        }
    }

    Instance inst = new DenseInstance(1, instance);
    inst.setDataset(instances);
    return inst;
}

From source file:edu.illinois.cs.cogcomp.lbjava.learn.WekaWrapper.java

License:Open Source License

/**
 * Creates a WEKA Instance object out of a {@link FeatureVector}.
 **//*w w  w . jav a  2s .  c o m*/
private Instance makeInstance(int[] exampleFeatures, double[] exampleValues, int[] exampleLabels,
        double[] labelValues) {
    // Make sure attributeInfo has been filled
    if (attributeInfo.size() == 0) {
        System.err.println("WekaWrapper: Error - makeInstance was called while attributeInfo " + "was empty.");
        new Exception().printStackTrace();
        System.exit(1);
    }

    // Initialize an Instance object
    Instance inst = new Instance(attributeInfo.size());

    // Acknowledge that this instance will be a member of our dataset
    // 'instances'
    inst.setDataset(instances);

    // Assign values for its attributes
    /*
     * Since we are iterating through this example's feature list, which does not contain the
     * label feature (the label feature is the first in the 'attribute' list), we start attIndex
     * at 1, while we start featureIndex at 0.
     */
    for (int featureIndex = 0, attIndex = 1; featureIndex < exampleFeatures.length; ++featureIndex, ++attIndex) {
        Feature f = (Feature) lexicon.lookupKey(exampleFeatures[featureIndex]);
        Attribute att = (Attribute) attributeInfo.elementAt(attIndex);

        // make sure the feature's identifier and the attribute's name match
        if (!(att.name().equals(f.getStringIdentifier()))) {
            System.err.println(
                    "WekaWrapper: Error - makeInstance encountered a misaligned " + "attribute-feature pair.");
            System.err.println(
                    "  " + att.name() + " and " + f.getStringIdentifier() + " should have been identical.");
            new Exception().printStackTrace();
            System.exit(1);
        }

        if (!f.isDiscrete())
            inst.setValue(attIndex, exampleValues[featureIndex]);
        else { // it's a discrete or conjunctive feature.
            String attValue = f.totalValues() == 2 ? att.value((int) exampleValues[featureIndex])
                    : f.getStringValue();
            inst.setValue(attIndex, attValue);
        }
    }

    /*
     * Here, we assume that if either the labels FeatureVector is empty of features, or is null,
     * then this example is to be considered unlabeled.
     */
    if (exampleLabels.length == 0) {
        inst.setClassMissing();
    } else if (exampleLabels.length > 1) {
        System.err.println("WekaWrapper: Error - Weka Instances may only take a single class " + "value, ");
        new Exception().printStackTrace();
        System.exit(1);
    } else {
        Feature label = labelLexicon.lookupKey(exampleLabels[0]);

        // make sure the name of the label feature matches the name of the 0'th
        // attribute
        if (!(label.getStringIdentifier().equals(((Attribute) attributeInfo.elementAt(0)).name()))) {
            System.err.println("WekaWrapper: Error - makeInstance found the wrong label name.");
            new Exception().printStackTrace();
            System.exit(1);
        }

        if (!label.isDiscrete())
            inst.setValue(0, labelValues[0]);
        else
            inst.setValue(0, label.getStringValue());
    }

    return inst;
}

From source file:edu.illinois.cs.cogcomp.saul.learn.SaulWekaWrapper.java

License:Open Source License

/**
 * Creates a WEKA Instance object out of a {@link FeatureVector}.
 **///www  .ja  va 2  s . c om
private Instance makeInstance(LBJavaInstance instance) {

    // Initialize an Instance object
    Instance inst = new Instance(attributeInfo.size());

    // Acknowledge that this instance will be a member of our dataset 'wekaInstances'
    inst.setDataset(wekaInstances);

    // set all nominal feature values to 0, which means those features are not used in this example
    for (int i = 1; i < attributeInfo.size(); i++)
        if (inst.attribute(i).isNominal())
            inst.setValue(i, "0");

    // Assign values for its attributes
    /*
     * Since we are iterating through this example's feature list, which does not contain the
     * label feature (the label feature is the first in the 'attribute' list), we set attIndex
     * to at exampleFeatures[featureIndices] + 1, while we start featureIndices at 0.
     */
    for (int featureIndex = 0; featureIndex < instance.featureIndices.length; ++featureIndex) {
        int attIndex = instance.featureIndices[featureIndex] + 1;
        Feature f = lexicon.lookupKey(instance.featureIndices[featureIndex]);

        // if the feature does not exist, do nothing. this may occur in test set.
        if (f == null)
            continue;
        Attribute att = (Attribute) attributeInfo.elementAt(attIndex);

        // make sure the feature and the attribute match
        if (!(att.name().equals(f.toString()))) {
            System.err.println(
                    "WekaWrapper: Error - makeInstance encountered a misaligned " + "attribute-feature pair.");
            System.err.println("  " + att.name() + " and " + f.toString() + " should have been identical.");
            new Exception().printStackTrace();
            System.exit(1);
        }
        if (f.isDiscrete())
            inst.setValue(attIndex, "1"); // this feature is used in this example so we set it to "1"
        else
            inst.setValue(attIndex, instance.featureValues[featureIndex]);

    }

    /*
     * Here, we assume that if either the labels FeatureVector is empty of features, or is null,
     * then this example is to be considered unlabeled.
     */
    if (instance.labelIndices.length == 0) {
        inst.setClassMissing();
    } else if (instance.labelIndices.length > 1) {
        System.err.println("WekaWrapper: Error - Weka Instances may only take a single class " + "value, ");
        new Exception().printStackTrace();
        System.exit(1);
    } else {
        Feature label = labelLexicon.lookupKey(instance.labelIndices[0]);

        // make sure the label feature matches the n 0'th attribute
        if (!(label.getGeneratingClassifier().equals(((Attribute) attributeInfo.elementAt(0)).name()))) {
            System.err.println("WekaWrapper: Error - makeInstance found the wrong label name.");
            new Exception().printStackTrace();
            System.exit(1);
        }

        if (!label.isDiscrete())
            inst.setValue(0, instance.labelValues[0]);
        else
            inst.setValue(0, label.getStringValue());
    }

    return inst;
}

From source file:edu.oregonstate.eecs.mcplan.abstraction.EvaluateSimilarityFunction.java

License:Open Source License

public static Instances transformInstances(final Instances src, final CoordinateTransform transform) {
    final ArrayList<Attribute> out_attributes = new ArrayList<Attribute>();
    for (int i = 0; i < transform.outDimension(); ++i) {
        out_attributes.add(new Attribute("x" + i));
    }//from  w  ww. j a  va 2 s.  co m
    out_attributes.add((Attribute) src.classAttribute().copy());
    final Instances out = new Instances(src.relationName() + "_" + transform.name(), out_attributes, 0);
    for (int i = 0; i < src.size(); ++i) {
        final Instance inst = src.get(i);
        final RealVector flat = new ArrayRealVector(WekaUtil.unlabeledFeatures(inst));
        final RealVector transformed_vector = transform.encode(flat).x;
        final double[] transformed = new double[transformed_vector.getDimension() + 1];
        for (int j = 0; j < transformed_vector.getDimension(); ++j) {
            transformed[j] = transformed_vector.getEntry(j);
        }
        transformed[transformed.length - 1] = inst.classValue();
        final Instance transformed_instance = new DenseInstance(inst.weight(), transformed);
        out.add(transformed_instance);
        transformed_instance.setDataset(out);
    }
    out.setClassIndex(out.numAttributes() - 1);
    return out;
}

From source file:edu.oregonstate.eecs.mcplan.abstraction.EvaluateSimilarityFunction.java

License:Open Source License

/**
 * @param args/*from  w w w .  j  a  va  2s  .c o  m*/
 * @throws IOException
 * @throws FileNotFoundException
 */
public static void main(final String[] args) throws FileNotFoundException, IOException {
    final String experiment_file = args[0];
    final File root_directory;
    if (args.length > 1) {
        root_directory = new File(args[1]);
    } else {
        root_directory = new File(".");
    }
    final CsvConfigurationParser csv_config = new CsvConfigurationParser(new FileReader(experiment_file));
    final String experiment_name = FilenameUtils.getBaseName(experiment_file);

    final File expr_directory = new File(root_directory, experiment_name);
    expr_directory.mkdirs();

    final Csv.Writer csv = new Csv.Writer(
            new PrintStream(new FileOutputStream(new File(expr_directory, "results.csv"))));
    final String[] parameter_headers = new String[] { "kpca.kernel", "kpca.rbf.sigma",
            "kpca.random_forest.Ntrees", "kpca.random_forest.max_depth", "kpca.Nbases", "multiclass.classifier",
            "multiclass.random_forest.Ntrees", "multiclass.random_forest.max_depth",
            "pairwise_classifier.max_branching", "training.label_noise" };
    csv.cell("domain").cell("abstraction");
    for (final String p : parameter_headers) {
        csv.cell(p);
    }
    csv.cell("Ntrain").cell("Ntest").cell("ami.mean").cell("ami.variance").cell("ami.confidence").newline();

    for (int expr = 0; expr < csv_config.size(); ++expr) {
        try {
            final KeyValueStore expr_config = csv_config.get(expr);
            final Configuration config = new Configuration(root_directory.getPath(), expr_directory.getName(),
                    expr_config);

            System.out.println("[Loading '" + config.training_data_single + "']");
            final Instances single = WekaUtil
                    .readLabeledDataset(new File(root_directory, config.training_data_single + ".arff"));

            final Instances train = new Instances(single, 0);
            final int[] idx = Fn.range(0, single.size());
            int instance_counter = 0;
            Fn.shuffle(config.rng, idx);
            final int Ntrain = config.getInt("Ntrain_games"); // TODO: Rename?
            final double label_noise = config.getDouble("training.label_noise");
            final int Nlabels = train.classAttribute().numValues();
            assert (Nlabels > 0);
            for (int i = 0; i < Ntrain; ++i) {
                final Instance inst = single.get(idx[instance_counter++]);
                if (label_noise > 0 && config.rng.nextDouble() < label_noise) {
                    int noisy_label = 0;
                    do {
                        noisy_label = config.rng.nextInt(Nlabels);
                    } while (noisy_label == (int) inst.classValue());
                    System.out.println("Noisy label (" + inst.classValue() + " -> " + noisy_label + ")");
                    inst.setClassValue(noisy_label);
                }
                train.add(inst);
                inst.setDataset(train);
            }

            final Fn.Function2<Boolean, Instance, Instance> plausible_p = createPlausiblePredicate(config);

            final int Ntest = config.Ntest_games;
            int Ntest_added = 0;
            final ArrayList<Instances> tests = new ArrayList<Instances>();
            while (instance_counter < single.size() && Ntest_added < Ntest) {
                final Instance inst = single.get(idx[instance_counter++]);
                boolean found = false;
                for (final Instances test : tests) {
                    // Note that 'plausible_p' should be transitive
                    if (plausible_p.apply(inst, test.get(0))) {
                        WekaUtil.addInstance(test, inst);
                        if (test.size() == 30) {
                            Ntest_added += test.size();
                        } else if (test.size() > 30) {
                            Ntest_added += 1;
                        }
                        found = true;
                        break;
                    }
                }

                if (!found) {
                    final Instances test = new Instances(single, 0);
                    WekaUtil.addInstance(test, inst);
                    tests.add(test);
                }
            }
            final Iterator<Instances> test_itr = tests.iterator();
            while (test_itr.hasNext()) {
                if (test_itr.next().size() < 30) {
                    test_itr.remove();
                }
            }
            System.out.println("=== tests.size() = " + tests.size());
            System.out.println("=== Ntest_added = " + Ntest_added);

            System.out.println("[Training]");
            final Evaluator evaluator = createEvaluator(config, train);
            //            final Instances transformed_test = evaluator.prepareInstances( test );

            System.out.println("[Evaluating]");

            final int Nxval = evaluator.isSensitiveToOrdering() ? 10 : 1;
            final MeanVarianceAccumulator ami = new MeanVarianceAccumulator();

            final MeanVarianceAccumulator errors = new MeanVarianceAccumulator();
            final MeanVarianceAccumulator relative_error = new MeanVarianceAccumulator();

            int c = 0;
            for (int xval = 0; xval < Nxval; ++xval) {
                for (final Instances test : tests) {
                    // TODO: Debugging
                    WekaUtil.writeDataset(new File(config.root_directory), "test_" + (c++), test);

                    //               transformed_test.randomize( new RandomAdaptor( config.rng ) );
                    //               final ClusterContingencyTable ct = evaluator.evaluate( transformed_test );
                    test.randomize(new RandomAdaptor(config.rng));
                    final ClusterContingencyTable ct = evaluator.evaluate(test);
                    System.out.println(ct);

                    int Nerrors = 0;
                    final MeanVarianceAccumulator mv = new MeanVarianceAccumulator();
                    for (int i = 0; i < ct.R; ++i) {
                        final int max = Fn.max(ct.n[i]);
                        Nerrors += (ct.a[i] - max);
                        mv.add(((double) ct.a[i]) / ct.N * Nerrors / ct.a[i]);
                    }
                    errors.add(Nerrors);
                    relative_error.add(mv.mean());

                    System.out.println("exemplar: " + test.get(0));
                    System.out.println("Nerrors = " + Nerrors);
                    final PrintStream ct_out = new PrintStream(
                            new FileOutputStream(new File(expr_directory, "ct_" + expr + "_" + xval + ".csv")));
                    ct.writeCsv(ct_out);
                    ct_out.close();
                    final double ct_ami = ct.adjustedMutualInformation_max();
                    if (Double.isNaN(ct_ami)) {
                        System.out.println("! ct_ami = NaN");
                    } else {
                        ami.add(ct_ami);
                    }
                    System.out.println();
                }
            }
            System.out.println("errors = " + errors.mean() + " (" + errors.confidence() + ")");
            System.out.println(
                    "relative_error = " + relative_error.mean() + " (" + relative_error.confidence() + ")");
            System.out.println("AMI_max = " + ami.mean() + " (" + ami.confidence() + ")");

            csv.cell(config.domain).cell(config.get("abstraction.discovery"));
            for (final String p : parameter_headers) {
                csv.cell(config.get(p));
            }
            csv.cell(Ntrain).cell(Ntest).cell(ami.mean()).cell(ami.variance()).cell(ami.confidence()).newline();
        } catch (final Exception ex) {
            ex.printStackTrace();
        }
    }
}

From source file:edu.oregonstate.eecs.mcplan.abstraction.MulticlassRepresenter.java

License:Open Source License

@Override
public Representation<S> encode(final FactoredRepresentation<S> x) {
    try {//from w  w  w  . j  a v a 2 s . com
        final Instance i = WekaUtil.labeledInstanceFromUnlabeledFeatures(headers_, x.phi());
        headers_.add(i);
        i.setDataset(headers_);
        final int c = (int) classifier_.classifyInstance(i);
        headers_.remove(0);
        return new ClusterAbstraction<S>(c);
    } catch (final RuntimeException ex) {
        throw ex;
    } catch (final Exception ex) {
        throw new RuntimeException(ex);
    }
}