List of usage examples for weka.core Instance setDataset
public void setDataset(Instances instances);
From source file:de.upb.timok.utils.DatasetTransformationUtils.java
License:Open Source License
public static Instances trainingSetToInstances(List<double[]> trainingSet) { final double[] sample = trainingSet.get(0); final ArrayList<Attribute> fvWekaAttributes = new ArrayList<>(sample.length + 1); for (int i = 0; i < sample.length; i++) { fvWekaAttributes.add(new Attribute(Integer.toString(i))); }/* www .ja va2 s . com*/ final ArrayList<String> classStrings = new ArrayList<>(); classStrings.add("normal"); final Attribute ClassAttribute = new Attribute("class", classStrings); // Declare the feature vector fvWekaAttributes.add(ClassAttribute); final Instances result = new Instances("trainingSet", fvWekaAttributes, trainingSet.size()); result.setClass(ClassAttribute); result.setClassIndex(fvWekaAttributes.size() - 1); for (final double[] instance : trainingSet) { final double[] newInstance = Arrays.copyOf(instance, instance.length + 1); newInstance[newInstance.length - 1] = 0; final Instance wekaInstance = new DenseInstance(1, newInstance); wekaInstance.setDataset(result); result.add(wekaInstance); } return result; }
From source file:de.upb.timok.utils.DatasetTransformationUtils.java
License:Open Source License
public static Instances testSetToInstances(List<double[]> testSet) { if (testSet.size() == 0) { logger.warn("TestSet has size 0"); }//from www . ja va2s . c om final double[] sample = testSet.get(0); final ArrayList<Attribute> fvWekaAttributes = new ArrayList<>(sample.length); for (int i = 0; i < sample.length; i++) { fvWekaAttributes.add(new Attribute(Integer.toString(i))); } final ArrayList<String> classStrings = new ArrayList<>(); classStrings.add("normal"); final Attribute ClassAttribute = new Attribute("class", classStrings); fvWekaAttributes.add(ClassAttribute); // Declare the feature vector final Instances result = new Instances("testSet", fvWekaAttributes, testSet.size()); result.setClassIndex(fvWekaAttributes.size() - 1); for (final double[] instance : testSet) { final Instance wekaInstance = new DenseInstance(1, instance); wekaInstance.setDataset(result); result.add(wekaInstance); } return result; }
From source file:edu.brandeis.wisedb.scheduler.training.decisiontree.DTSearcher.java
License:Open Source License
@Override public List<Action> schedule(Set<ModelQuery> toSched) { SingularMachineState start = new SingularMachineState(toSched, qtp, sla); List<Action> toR = new LinkedList<Action>(); applyLoop: while (!start.isGoalState()) { log.fine("Current state: " + start); SortedMap<String, String> features = start.getFeatures(); Instance toClassify = new Instance(attributes.length); toClassify.setDataset(wekaDataSet); for (Attribute a : attributes) { if (a.name().equals("action")) { //toClassify.setValue(a, "N"); continue; }/*w ww . j a v a 2 s . c om*/ try { if (features.get(a.name()).equals("?")) { toClassify.setMissing(a); continue; } try { double d = Double.valueOf(features.get(a.name())); toClassify.setValue(a, d); } catch (NumberFormatException e) { toClassify.setValue(a, features.get(a.name())); } } catch (IllegalArgumentException e) { e.printStackTrace(); log.warning( "Encountered previously unseen attribute value! Might need better training data... making random selection."); log.warning("Value for attribute " + a.name() + " was " + features.get(a.name())); Action rand = getPUAction(start); log.warning("Random action selected: " + rand); toR.add(rand); start.applyAction(rand); continue applyLoop; } } toClassify.setClassMissing(); log.finer("Going to classify: " + toClassify); try { double d = tree.classifyInstance(toClassify); toClassify.setClassValue(d); String action = toClassify.stringValue(toClassify.classIndex()); log.finer("Got action string: " + action); Action selected = null; for (Action a : start.getPossibleActions()) { if (actionMatches(a, action)) { selected = a; break; } } if (selected == null) { //log.warning("Could not find applicable action for string: " + action + " ... picking random action"); Action a = getPUAction(start); start.applyAction(a); toR.add(a); continue; } log.fine("Selected action: " + selected); start.applyAction(selected); toR.add(selected); } catch (Exception e) { // TODO Auto-generated catch block e.printStackTrace(); return null; } } return toR; }
From source file:edu.cmu.lti.oaqa.baseqa.providers.ml.classifiers.MekaProvider.java
License:Apache License
@Override public Map<String, Double> infer(Map<String, Double> features) throws AnalysisEngineProcessException { Instance instance = new SparseInstance(features.size()); instance.setDataset(datasetSchema); for (Map.Entry<String, Double> e : features.entrySet()) { Attribute attribute = datasetSchema.attribute(e.getKey()); if (attribute == null) continue; instance.setValue(attribute, e.getValue()); }// ww w .j a va 2 s . co m double[] probs; try { probs = classifier.distributionForInstance(instance); } catch (Exception e) { throw new AnalysisEngineProcessException(e); } assert datasetSchema.classIndex() == probs.length; return IntStream.range(0, probs.length).boxed() .collect(toMap(i -> datasetSchema.attribute(i).name(), i -> probs[i])); }
From source file:edu.cuny.qc.speech.AuToBI.util.ClassifierUtils.java
License:Open Source License
/** * Given a (possibly empty) Instances object containing the required weka Attributes, generates a weka Instance for a * single data point./* www.j a v a2s .com*/ * * @param instances the weka Instances object containing attributes * @param data_point the data point to convert * @return a weka instance with assigned attributes */ protected static Instance assignWekaAttributes(Instances instances, Word data_point) { double[] instance = new double[instances.numAttributes()]; for (int i = 0; i < instances.numAttributes(); ++i) { Attribute attribute = instances.attribute(i); if (data_point.hasAttribute(attribute.name()) && !data_point.getAttribute(attribute.name()).toString().equals("?")) { switch (attribute.type()) { case Attribute.NOMINAL: int index = attribute.indexOfValue(data_point.getAttribute(attribute.name()).toString()); instance[i] = (double) index; break; case Attribute.NUMERIC: // Check if value is really a number. try { instance[i] = Double.valueOf(data_point.getAttribute(attribute.name()).toString()); } catch (NumberFormatException e) { AuToBIUtils.error("Number expected for feature: " + attribute.name()); } break; case Attribute.STRING: instance[i] = attribute.addStringValue(data_point.getAttribute(attribute.name()).toString()); break; default: AuToBIUtils.error("Unknown attribute type"); } } else { instance[i] = Utils.missingValue(); } } Instance inst = new DenseInstance(1, instance); inst.setDataset(instances); return inst; }
From source file:edu.illinois.cs.cogcomp.lbjava.learn.WekaWrapper.java
License:Open Source License
/** * Creates a WEKA Instance object out of a {@link FeatureVector}. **//*w w w . jav a 2s . c o m*/ private Instance makeInstance(int[] exampleFeatures, double[] exampleValues, int[] exampleLabels, double[] labelValues) { // Make sure attributeInfo has been filled if (attributeInfo.size() == 0) { System.err.println("WekaWrapper: Error - makeInstance was called while attributeInfo " + "was empty."); new Exception().printStackTrace(); System.exit(1); } // Initialize an Instance object Instance inst = new Instance(attributeInfo.size()); // Acknowledge that this instance will be a member of our dataset // 'instances' inst.setDataset(instances); // Assign values for its attributes /* * Since we are iterating through this example's feature list, which does not contain the * label feature (the label feature is the first in the 'attribute' list), we start attIndex * at 1, while we start featureIndex at 0. */ for (int featureIndex = 0, attIndex = 1; featureIndex < exampleFeatures.length; ++featureIndex, ++attIndex) { Feature f = (Feature) lexicon.lookupKey(exampleFeatures[featureIndex]); Attribute att = (Attribute) attributeInfo.elementAt(attIndex); // make sure the feature's identifier and the attribute's name match if (!(att.name().equals(f.getStringIdentifier()))) { System.err.println( "WekaWrapper: Error - makeInstance encountered a misaligned " + "attribute-feature pair."); System.err.println( " " + att.name() + " and " + f.getStringIdentifier() + " should have been identical."); new Exception().printStackTrace(); System.exit(1); } if (!f.isDiscrete()) inst.setValue(attIndex, exampleValues[featureIndex]); else { // it's a discrete or conjunctive feature. String attValue = f.totalValues() == 2 ? att.value((int) exampleValues[featureIndex]) : f.getStringValue(); inst.setValue(attIndex, attValue); } } /* * Here, we assume that if either the labels FeatureVector is empty of features, or is null, * then this example is to be considered unlabeled. */ if (exampleLabels.length == 0) { inst.setClassMissing(); } else if (exampleLabels.length > 1) { System.err.println("WekaWrapper: Error - Weka Instances may only take a single class " + "value, "); new Exception().printStackTrace(); System.exit(1); } else { Feature label = labelLexicon.lookupKey(exampleLabels[0]); // make sure the name of the label feature matches the name of the 0'th // attribute if (!(label.getStringIdentifier().equals(((Attribute) attributeInfo.elementAt(0)).name()))) { System.err.println("WekaWrapper: Error - makeInstance found the wrong label name."); new Exception().printStackTrace(); System.exit(1); } if (!label.isDiscrete()) inst.setValue(0, labelValues[0]); else inst.setValue(0, label.getStringValue()); } return inst; }
From source file:edu.illinois.cs.cogcomp.saul.learn.SaulWekaWrapper.java
License:Open Source License
/** * Creates a WEKA Instance object out of a {@link FeatureVector}. **///www .ja va 2 s . c om private Instance makeInstance(LBJavaInstance instance) { // Initialize an Instance object Instance inst = new Instance(attributeInfo.size()); // Acknowledge that this instance will be a member of our dataset 'wekaInstances' inst.setDataset(wekaInstances); // set all nominal feature values to 0, which means those features are not used in this example for (int i = 1; i < attributeInfo.size(); i++) if (inst.attribute(i).isNominal()) inst.setValue(i, "0"); // Assign values for its attributes /* * Since we are iterating through this example's feature list, which does not contain the * label feature (the label feature is the first in the 'attribute' list), we set attIndex * to at exampleFeatures[featureIndices] + 1, while we start featureIndices at 0. */ for (int featureIndex = 0; featureIndex < instance.featureIndices.length; ++featureIndex) { int attIndex = instance.featureIndices[featureIndex] + 1; Feature f = lexicon.lookupKey(instance.featureIndices[featureIndex]); // if the feature does not exist, do nothing. this may occur in test set. if (f == null) continue; Attribute att = (Attribute) attributeInfo.elementAt(attIndex); // make sure the feature and the attribute match if (!(att.name().equals(f.toString()))) { System.err.println( "WekaWrapper: Error - makeInstance encountered a misaligned " + "attribute-feature pair."); System.err.println(" " + att.name() + " and " + f.toString() + " should have been identical."); new Exception().printStackTrace(); System.exit(1); } if (f.isDiscrete()) inst.setValue(attIndex, "1"); // this feature is used in this example so we set it to "1" else inst.setValue(attIndex, instance.featureValues[featureIndex]); } /* * Here, we assume that if either the labels FeatureVector is empty of features, or is null, * then this example is to be considered unlabeled. */ if (instance.labelIndices.length == 0) { inst.setClassMissing(); } else if (instance.labelIndices.length > 1) { System.err.println("WekaWrapper: Error - Weka Instances may only take a single class " + "value, "); new Exception().printStackTrace(); System.exit(1); } else { Feature label = labelLexicon.lookupKey(instance.labelIndices[0]); // make sure the label feature matches the n 0'th attribute if (!(label.getGeneratingClassifier().equals(((Attribute) attributeInfo.elementAt(0)).name()))) { System.err.println("WekaWrapper: Error - makeInstance found the wrong label name."); new Exception().printStackTrace(); System.exit(1); } if (!label.isDiscrete()) inst.setValue(0, instance.labelValues[0]); else inst.setValue(0, label.getStringValue()); } return inst; }
From source file:edu.oregonstate.eecs.mcplan.abstraction.EvaluateSimilarityFunction.java
License:Open Source License
public static Instances transformInstances(final Instances src, final CoordinateTransform transform) { final ArrayList<Attribute> out_attributes = new ArrayList<Attribute>(); for (int i = 0; i < transform.outDimension(); ++i) { out_attributes.add(new Attribute("x" + i)); }//from w ww. j a va 2 s. co m out_attributes.add((Attribute) src.classAttribute().copy()); final Instances out = new Instances(src.relationName() + "_" + transform.name(), out_attributes, 0); for (int i = 0; i < src.size(); ++i) { final Instance inst = src.get(i); final RealVector flat = new ArrayRealVector(WekaUtil.unlabeledFeatures(inst)); final RealVector transformed_vector = transform.encode(flat).x; final double[] transformed = new double[transformed_vector.getDimension() + 1]; for (int j = 0; j < transformed_vector.getDimension(); ++j) { transformed[j] = transformed_vector.getEntry(j); } transformed[transformed.length - 1] = inst.classValue(); final Instance transformed_instance = new DenseInstance(inst.weight(), transformed); out.add(transformed_instance); transformed_instance.setDataset(out); } out.setClassIndex(out.numAttributes() - 1); return out; }
From source file:edu.oregonstate.eecs.mcplan.abstraction.EvaluateSimilarityFunction.java
License:Open Source License
/** * @param args/*from w w w . j a va 2s .c o m*/ * @throws IOException * @throws FileNotFoundException */ public static void main(final String[] args) throws FileNotFoundException, IOException { final String experiment_file = args[0]; final File root_directory; if (args.length > 1) { root_directory = new File(args[1]); } else { root_directory = new File("."); } final CsvConfigurationParser csv_config = new CsvConfigurationParser(new FileReader(experiment_file)); final String experiment_name = FilenameUtils.getBaseName(experiment_file); final File expr_directory = new File(root_directory, experiment_name); expr_directory.mkdirs(); final Csv.Writer csv = new Csv.Writer( new PrintStream(new FileOutputStream(new File(expr_directory, "results.csv")))); final String[] parameter_headers = new String[] { "kpca.kernel", "kpca.rbf.sigma", "kpca.random_forest.Ntrees", "kpca.random_forest.max_depth", "kpca.Nbases", "multiclass.classifier", "multiclass.random_forest.Ntrees", "multiclass.random_forest.max_depth", "pairwise_classifier.max_branching", "training.label_noise" }; csv.cell("domain").cell("abstraction"); for (final String p : parameter_headers) { csv.cell(p); } csv.cell("Ntrain").cell("Ntest").cell("ami.mean").cell("ami.variance").cell("ami.confidence").newline(); for (int expr = 0; expr < csv_config.size(); ++expr) { try { final KeyValueStore expr_config = csv_config.get(expr); final Configuration config = new Configuration(root_directory.getPath(), expr_directory.getName(), expr_config); System.out.println("[Loading '" + config.training_data_single + "']"); final Instances single = WekaUtil .readLabeledDataset(new File(root_directory, config.training_data_single + ".arff")); final Instances train = new Instances(single, 0); final int[] idx = Fn.range(0, single.size()); int instance_counter = 0; Fn.shuffle(config.rng, idx); final int Ntrain = config.getInt("Ntrain_games"); // TODO: Rename? final double label_noise = config.getDouble("training.label_noise"); final int Nlabels = train.classAttribute().numValues(); assert (Nlabels > 0); for (int i = 0; i < Ntrain; ++i) { final Instance inst = single.get(idx[instance_counter++]); if (label_noise > 0 && config.rng.nextDouble() < label_noise) { int noisy_label = 0; do { noisy_label = config.rng.nextInt(Nlabels); } while (noisy_label == (int) inst.classValue()); System.out.println("Noisy label (" + inst.classValue() + " -> " + noisy_label + ")"); inst.setClassValue(noisy_label); } train.add(inst); inst.setDataset(train); } final Fn.Function2<Boolean, Instance, Instance> plausible_p = createPlausiblePredicate(config); final int Ntest = config.Ntest_games; int Ntest_added = 0; final ArrayList<Instances> tests = new ArrayList<Instances>(); while (instance_counter < single.size() && Ntest_added < Ntest) { final Instance inst = single.get(idx[instance_counter++]); boolean found = false; for (final Instances test : tests) { // Note that 'plausible_p' should be transitive if (plausible_p.apply(inst, test.get(0))) { WekaUtil.addInstance(test, inst); if (test.size() == 30) { Ntest_added += test.size(); } else if (test.size() > 30) { Ntest_added += 1; } found = true; break; } } if (!found) { final Instances test = new Instances(single, 0); WekaUtil.addInstance(test, inst); tests.add(test); } } final Iterator<Instances> test_itr = tests.iterator(); while (test_itr.hasNext()) { if (test_itr.next().size() < 30) { test_itr.remove(); } } System.out.println("=== tests.size() = " + tests.size()); System.out.println("=== Ntest_added = " + Ntest_added); System.out.println("[Training]"); final Evaluator evaluator = createEvaluator(config, train); // final Instances transformed_test = evaluator.prepareInstances( test ); System.out.println("[Evaluating]"); final int Nxval = evaluator.isSensitiveToOrdering() ? 10 : 1; final MeanVarianceAccumulator ami = new MeanVarianceAccumulator(); final MeanVarianceAccumulator errors = new MeanVarianceAccumulator(); final MeanVarianceAccumulator relative_error = new MeanVarianceAccumulator(); int c = 0; for (int xval = 0; xval < Nxval; ++xval) { for (final Instances test : tests) { // TODO: Debugging WekaUtil.writeDataset(new File(config.root_directory), "test_" + (c++), test); // transformed_test.randomize( new RandomAdaptor( config.rng ) ); // final ClusterContingencyTable ct = evaluator.evaluate( transformed_test ); test.randomize(new RandomAdaptor(config.rng)); final ClusterContingencyTable ct = evaluator.evaluate(test); System.out.println(ct); int Nerrors = 0; final MeanVarianceAccumulator mv = new MeanVarianceAccumulator(); for (int i = 0; i < ct.R; ++i) { final int max = Fn.max(ct.n[i]); Nerrors += (ct.a[i] - max); mv.add(((double) ct.a[i]) / ct.N * Nerrors / ct.a[i]); } errors.add(Nerrors); relative_error.add(mv.mean()); System.out.println("exemplar: " + test.get(0)); System.out.println("Nerrors = " + Nerrors); final PrintStream ct_out = new PrintStream( new FileOutputStream(new File(expr_directory, "ct_" + expr + "_" + xval + ".csv"))); ct.writeCsv(ct_out); ct_out.close(); final double ct_ami = ct.adjustedMutualInformation_max(); if (Double.isNaN(ct_ami)) { System.out.println("! ct_ami = NaN"); } else { ami.add(ct_ami); } System.out.println(); } } System.out.println("errors = " + errors.mean() + " (" + errors.confidence() + ")"); System.out.println( "relative_error = " + relative_error.mean() + " (" + relative_error.confidence() + ")"); System.out.println("AMI_max = " + ami.mean() + " (" + ami.confidence() + ")"); csv.cell(config.domain).cell(config.get("abstraction.discovery")); for (final String p : parameter_headers) { csv.cell(config.get(p)); } csv.cell(Ntrain).cell(Ntest).cell(ami.mean()).cell(ami.variance()).cell(ami.confidence()).newline(); } catch (final Exception ex) { ex.printStackTrace(); } } }
From source file:edu.oregonstate.eecs.mcplan.abstraction.MulticlassRepresenter.java
License:Open Source License
@Override public Representation<S> encode(final FactoredRepresentation<S> x) { try {//from w w w . j a v a 2 s . com final Instance i = WekaUtil.labeledInstanceFromUnlabeledFeatures(headers_, x.phi()); headers_.add(i); i.setDataset(headers_); final int c = (int) classifier_.classifyInstance(i); headers_.remove(0); return new ClusterAbstraction<S>(c); } catch (final RuntimeException ex) { throw ex; } catch (final Exception ex) { throw new RuntimeException(ex); } }