List of usage examples for weka.core Instances setClassIndex
public void setClassIndex(int classIndex)
From source file:edu.oregonstate.eecs.mcplan.abstraction.PairDataset.java
License:Open Source License
/** * Constructs one positive pair and one negative pair involving each * data point in 'single'.// w ww .j ava 2s . c o m * @param rng * @param max_pairwise_instances * @param single * @param combiner * @return */ public static <S, X extends FactoredRepresentation<S>, A extends VirtualConstructor<A>> PairDataset makeBalancedPairDataset( final RandomGenerator rng, final int negative_per_instance, final int positive_per_instance, final Instances single, final InstanceCombiner combiner) { final int Nnegative = negative_per_instance * single.size(); final int Npositive = positive_per_instance * single.size(); // final int max_pairwise = config.getInt( "training.max_pairwise" ); final ReservoirSampleAccumulator<Pair<Instance, int[]>> negative = new ReservoirSampleAccumulator<Pair<Instance, int[]>>( rng, Nnegative); final ReservoirSampleAccumulator<Pair<Instance, int[]>> positive = new ReservoirSampleAccumulator<Pair<Instance, int[]>>( rng, Npositive); for (int i = 0; i < single.size(); ++i) { // if( i % 100 == 0 ) { // System.out.println( "i = " + i ); // } for (int j = i + 1; j < single.size(); ++j) { final Instance ii = single.get(i); final Instance ij = single.get(j); final int label; if (ii.classValue() == ij.classValue()) { label = 1; if (positive.acceptNext()) { final Instance pair_instance = combiner.apply(ii, ij, label); positive.addPending(Pair.makePair(pair_instance, new int[] { i, j })); } } else { label = 0; if (negative.acceptNext()) { final Instance pair_instance = combiner.apply(ii, ij, label); negative.addPending(Pair.makePair(pair_instance, new int[] { i, j })); } } } } final int N = Math.min(negative.samples().size(), positive.samples().size()); final String dataset_name = "train_" + combiner.keyword() + "_" + Nnegative + "x" + Npositive; final Instances x = new Instances(dataset_name, combiner.attributes(), Nnegative + Npositive); x.setClassIndex(x.numAttributes() - 1); final ArrayList<int[]> matches = new ArrayList<int[]>(); for (final Pair<Instance, int[]> ineg : negative.samples()) { WekaUtil.addInstance(x, ineg.first); matches.add(ineg.second); } for (final Pair<Instance, int[]> ipos : positive.samples()) { WekaUtil.addInstance(x, ipos.first); matches.add(ipos.second); } return new PairDataset(x, matches, combiner); }
From source file:edu.oregonstate.eecs.mcplan.abstraction.PairDataset.java
License:Open Source License
/** * Constructs one positive pair and one negative pair involving each * data point in 'single'./*from w ww. j a v a 2 s .c o m*/ * @param rng * @param max_pairwise_instances * @param single * @param combiner * @return */ public static <S, X extends FactoredRepresentation<S>, A extends VirtualConstructor<A>> PairDataset makePlausiblePairDataset( final RandomGenerator rng, final int negative_per_instance, final int positive_per_instance, final Instances single, final InstanceCombiner combiner, final Fn.Function2<Boolean, Instance, Instance> plausible_p) { final int Nnegative = negative_per_instance * single.size(); final int Npositive = positive_per_instance * single.size(); // final int max_pairwise = config.getInt( "training.max_pairwise" ); final ReservoirSampleAccumulator<Pair<Instance, int[]>> negative = new ReservoirSampleAccumulator<Pair<Instance, int[]>>( rng, Nnegative); final ReservoirSampleAccumulator<Pair<Instance, int[]>> positive = new ReservoirSampleAccumulator<Pair<Instance, int[]>>( rng, Npositive); for (int i = 0; i < single.size(); ++i) { // if( i % 100 == 0 ) { // System.out.println( "i = " + i ); // } for (int j = i + 1; j < single.size(); ++j) { final Instance ii = single.get(i); final Instance ij = single.get(j); if (!plausible_p.apply(ii, ij)) { // System.out.println( "Not plausible: " + ii + " != " + ij ); continue; } // System.out.println( "! Plausible: " + ii + " == " + ij ); final int label; if (ii.classValue() == ij.classValue()) { label = 1; if (positive.acceptNext()) { final Instance pair_instance = combiner.apply(ii, ij, label); positive.addPending(Pair.makePair(pair_instance, new int[] { i, j })); } } else { label = 0; if (negative.acceptNext()) { final Instance pair_instance = combiner.apply(ii, ij, label); negative.addPending(Pair.makePair(pair_instance, new int[] { i, j })); } } } } final int N = Math.min(negative.samples().size(), positive.samples().size()); final String dataset_name = "train_" + combiner.keyword() + "_" + negative.samples().size() + "x" + positive.samples().size(); final Instances x = new Instances(dataset_name, combiner.attributes(), Nnegative + Npositive); x.setClassIndex(x.numAttributes() - 1); final ArrayList<int[]> matches = new ArrayList<int[]>(); for (final Pair<Instance, int[]> ineg : negative.samples()) { WekaUtil.addInstance(x, ineg.first); matches.add(ineg.second); } for (final Pair<Instance, int[]> ipos : positive.samples()) { WekaUtil.addInstance(x, ipos.first); matches.add(ipos.second); } return new PairDataset(x, matches, combiner); }
From source file:edu.oregonstate.eecs.mcplan.abstraction.WekaUtil.java
License:Open Source License
public static Instances createEmptyInstances(final String name, final ArrayList<Attribute> attributes) { final Instances instances = new Instances(name, attributes, 0); instances.setClassIndex(attributes.size() - 1); return instances; }
From source file:edu.oregonstate.eecs.mcplan.abstraction.WekaUtil.java
License:Open Source License
/** * Creates an Instances object containing the specified feature vector * and with an added "dummy label".//w w w . ja va 2s.co m * @param attributes * @param features * @return */ public static Instances createSingletonInstances(final List<Attribute> attributes, final double[] features) { final ArrayList<Attribute> attr_dummy_label = new ArrayList<Attribute>(attributes); attr_dummy_label.add(createBinaryNominalAttribute("__dummy_label__")); final double[] features_dummy_label = new double[features.length + 1]; Fn.memcpy(features_dummy_label, features, features.length); final Instance instance = new DenseInstance(1.0, features_dummy_label); final Instances x = new Instances("__eval__", attr_dummy_label, 1); x.setClassIndex(attr_dummy_label.size() - 1); x.add(instance); instance.setDataset(x); return x; }
From source file:edu.oregonstate.eecs.mcplan.abstraction.WekaUtil.java
License:Open Source License
/** * Load an ARFF dataset.// w w w . ja va 2s .co m * * Adapted from: * http://weka.wikispaces.com/Use+WEKA+in+your+Java+code * @param file * @return */ public static Instances readLabeledDataset(final File file) { try { final DataSource source = new DataSource(file.getPath()); final Instances data = source.getDataSet(); // setting class attribute if the data format does not provide this information // For example, the XRFF format saves the class attribute information as well if (data.classIndex() == -1) { data.setClassIndex(data.numAttributes() - 1); } return data; } catch (final Exception ex) { throw new RuntimeException(ex); } }
From source file:edu.oregonstate.eecs.mcplan.abstraction.WekaUtil.java
License:Open Source License
public static Instances powerSet(final Instances D, final int n) { final Attribute class_attr = D.classAttribute(); final ImmutableSet.Builder<Integer> b = new ImmutableSet.Builder<Integer>(); final int Nattr = class_attr != null ? D.numAttributes() - 1 : D.numAttributes(); for (final int i : Fn.range(1, Nattr)) { b.add(i);// ww w . ja v a 2 s .c o m } final Set<Set<Integer>> index = Sets.powerSet(b.build()); final ArrayList<Attribute> attributes = new ArrayList<Attribute>(); for (final Set<Integer> subset : index) { if (subset.isEmpty() || subset.size() > n) { continue; } final StringBuilder attr_name = new StringBuilder(); int count = 0; for (final Integer i : subset) { if (count++ > 0) { attr_name.append("_x_"); } attr_name.append(D.attribute(i).name()); } attributes.add(new Attribute(attr_name.toString())); } if (class_attr != null) { assert (class_attr.isNominal()); attributes.add(WekaUtil.createNominalAttribute(class_attr.name(), class_attr.numValues())); } final String Pname = "P" + n + "_" + D.relationName(); final Instances P = new Instances(Pname, attributes, 0); if (class_attr != null) { P.setClassIndex(attributes.size() - 1); } for (final Instance inst : D) { final double[] xp = new double[attributes.size()]; int idx = 0; for (final Set<Integer> subset : index) { if (subset.isEmpty() || subset.size() > n) { continue; } double p = 1.0; for (final Integer i : subset) { p *= inst.value(i); } xp[idx++] = p; } if (class_attr != null) { xp[idx++] = inst.classValue(); } WekaUtil.addInstance(P, new DenseInstance(inst.weight(), xp)); } return P; }
From source file:edu.oregonstate.eecs.mcplan.domains.blackjack.AbstractionDiscovery.java
License:Open Source License
private static <X extends FactoredRepresentation<BlackjackState>> Instances makeTrainingSet( final SolvedStateAccumulator<X> acc, final ArrayList<Attribute> attributes, final int iter) { final int[] num_instances = new int[2]; final ArrayList<Instance> negative = new ArrayList<Instance>(); final ArrayList<Instance> positive = new ArrayList<Instance>(); final ArrayList<String> nominal = new ArrayList<String>(); nominal.add("0"); nominal.add("1"); attributes.add(new Attribute("__label__", nominal)); final int d = attributes.size() - 1; // Minus 1 for label for (int i = 0; i < acc.Phi_.size(); ++i) { final double[] phi_i = acc.Phi_.get(i).toArray(); for (int j = i + 1; j < acc.Phi_.size(); ++j) { final double[] phi_j = acc.Phi_.get(j).toArray(); final double[] phi_labeled = new double[d + 1]; for (int k = 0; k < d; ++k) { phi_labeled[k] = Math.abs(phi_i[k] - phi_j[k]); }/* w w w .ja v a2s . co m*/ final int label; if (acc.actions_.get(i).equals(acc.actions_.get(j))) { label = 1; } else { label = 0; } final double weight = 1.0; // TODO: Weights? final String label_string = Integer.toString(label); phi_labeled[d] = label; //attributes.get( label_index ).indexOfValue( label_string ); num_instances[label] += 1; final Instance instance = new DenseInstance(weight, phi_labeled); if (label == 0) { negative.add(instance); } else { positive.add(instance); } } } System.out.println("num_instances = " + Arrays.toString(num_instances)); final Instances x = new Instances("train" + iter, attributes, negative.size() + positive.size()); x.setClassIndex(d); x.addAll(negative); x.addAll(positive); return x; }
From source file:edu.oregonstate.eecs.mcplan.ml.GameTreeStateSimilarityDataset.java
License:Open Source License
@Override public void run() { System.out.println("*** Extracting state nodes"); final Visitor visitor = new Visitor(); tree_.root().accept(visitor);/*from w w w . j a va 2 s . co m*/ // This extracts only the level-1 nodes. // TODO: Do this somewhere better. final HashMap<List<ActionNode<S, A>>, List<StateNode<S, A>>> tx = new HashMap<List<ActionNode<S, A>>, List<StateNode<S, A>>>(); final ArrayList<StateNode<S, A>> depth_1 = new ArrayList<StateNode<S, A>>(); for (final Map.Entry<List<ActionNode<S, A>>, List<StateNode<S, A>>> e : visitor.xs.entrySet()) { if (e.getKey() == null || e.getKey().size() != 1) { continue; } else { depth_1.addAll(e.getValue()); } } tx.put(null, depth_1); final Comparator<Instance> weight_comp = new Comparator<Instance>() { @Override public int compare(final Instance a, final Instance b) { return (int) Math.signum(a.weight() - b.weight()); } }; final int max_cap = max_instances_ + 1; final PriorityQueue<Instance> positive = new PriorityQueue<Instance>(max_cap, weight_comp); final PriorityQueue<Instance> negative = new PriorityQueue<Instance>(max_cap, weight_comp); System.out.println("*** Building Instances"); for (final Map.Entry<List<ActionNode<S, A>>, List<StateNode<S, A>>> e : tx.entrySet()) { System.out.println("***** key = " + e.getKey() + ", value.size() = " + e.getValue().size()); final String name = (e.getKey() != null ? e.getKey().toString() : "null"); final List<StateNode<S, A>> values = e.getValue(); final int[] num_instances = { 0, 0 }; int count = 0; for (int i = 0; i < values.size(); ++i) { for (int j = i + 1; j < values.size(); ++j) { if (count++ % 100 == 0) { System.out.println("***** instance " + (count - 1)); } final StateNode<S, A> s_i = values.get(i); final StateNode<S, A> s_j = values.get(j); if (s_i.n() < min_samples_ || s_j.n() < min_samples_) { System.out.println("! skipping under-sampled state pair"); continue; } final double[] phi_i = s_i.token.phi(); final double[] phi_j = s_j.token.phi(); assert (phi_i.length == phi_j.length); if (phi_i.length != attributes_.size() - 1) { System.out.println("! phi_i.length = " + phi_i.length); System.out.println("! attributes_.size() = " + attributes_.size()); } assert (phi_i.length == attributes_.size() - 1); // Feature vector is absolute difference of the two state // feature vectors. final double[] phi_labeled = new double[phi_i.length + 1]; for (int k = 0; k < phi_i.length; ++k) { phi_labeled[k] = Math.abs(phi_i[k] - phi_j[k]); } final Tuple2<Integer, Double> labeled = label(e.getKey(), player_, s_i, s_j); final int label = labeled._1; final double weight = labeled._2; final String label_string = Integer.toString(label); phi_labeled[label_index] = label; //attributes.get( label_index ).indexOfValue( label_string ); num_instances[label] += 1; final Instance instance = new DenseInstance(weight, phi_labeled); if (label == 0) { negative.add(instance); if (negative.size() >= max_cap) { negative.poll(); } } else { positive.add(instance); if (positive.size() >= max_cap) { positive.poll(); } } } // for j } // for i System.out.println("num_instances = " + Arrays.toString(num_instances)); final Instances x = new Instances(name, attributes_, negative.size() + positive.size()); x.setClassIndex(label_index); x.addAll(negative); x.addAll(positive); xs_.put(e.getKey(), x); } }
From source file:edu.uga.cs.fluxbuster.classification.Classifier.java
License:Open Source License
/** * Executes the classifier.//from w ww . j ava 2 s.c o m * * @param prepfeatures the prepared features in arff format * @param modelfile the path to the serialized model * @param clusters the clusters to classify * @return a map of the classified clusters, the keys are the classes * and the values are lists of cluster id's belonging to those classes */ private Map<ClusterClass, List<StoredDomainCluster>> executeClassifier(String prepfeatures, String modelfile, List<StoredDomainCluster> clusters) { Map<ClusterClass, List<StoredDomainCluster>> retval = new HashMap<ClusterClass, List<StoredDomainCluster>>(); try { DataSource source = new DataSource(new ByteArrayInputStream(prepfeatures.getBytes())); Instances data = source.getDataSet(); if (data.classIndex() == -1) { data.setClassIndex(data.numAttributes() - 1); } String[] options = weka.core.Utils.splitOptions("-p 0"); J48 cls = (J48) weka.core.SerializationHelper.read(modelfile); cls.setOptions(options); for (int i = 0; i < data.numInstances(); i++) { double pred = cls.classifyInstance(data.instance(i)); ClusterClass clusClass = ClusterClass .valueOf(data.classAttribute().value((int) pred).toUpperCase()); if (!retval.containsKey(clusClass)) { retval.put(clusClass, new ArrayList<StoredDomainCluster>()); } retval.get(clusClass).add(clusters.get(i)); } } catch (Exception e) { if (log.isErrorEnabled()) { log.error("Error executing classifier.", e); } } return retval; }
From source file:edu.umbc.cs.maple.utils.WekaUtils.java
License:Open Source License
/** Converts the instances in the given dataset to binary, setting the specified labels to positive. * Note this method is destructive to data, directly modifying its contents. * @param data the multiclass dataset to be converted to binary. * @param positiveClassValue the class value to treat as positive. *//*from w ww . jav a2 s . com*/ public static void convertMulticlassToBinary(Instances data, String positiveClassValue) { // ensure that data is nominal if (!data.classAttribute().isNominal()) throw new IllegalArgumentException("Instances must have a nominal class."); // create the new class attribute FastVector newClasses = new FastVector(2); newClasses.addElement("Y"); newClasses.addElement("N"); Attribute newClassAttribute = new Attribute("class", newClasses); // alter the class attribute to be binary int newClassAttIdx = data.classIndex(); data.insertAttributeAt(newClassAttribute, newClassAttIdx); int classAttIdx = data.classIndex(); // set the instances classes to be binary, with the labels [Y,N] (indices 0 and 1 respectively) int numInstances = data.numInstances(); for (int instIdx = 0; instIdx < numInstances; instIdx++) { Instance inst = data.instance(instIdx); if (inst.stringValue(classAttIdx).equals(positiveClassValue)) { inst.setValue(newClassAttIdx, 0); // set it to the first class, which will be Y } else { inst.setValue(newClassAttIdx, 1); // set it to the second class, which will be 0 } } // switch the class index to the new class and delete the old class data.setClassIndex(newClassAttIdx); data.deleteAttributeAt(classAttIdx); // alter the dataset name data.setRelationName(data.relationName() + "-" + positiveClassValue); }