Example usage for weka.core Instances setClassIndex

Introduction

In this page you can find the example usage for weka.core Instances setClassIndex.

Prototype

public void setClassIndex(int classIndex)

Source Link

Document

Sets the class index of the set.

Usage

From source file:edu.oregonstate.eecs.mcplan.abstraction.PairDataset.java

License:Open Source License

/**
 * Constructs one positive pair and one negative pair involving each
 * data point in 'single'.//  w  ww  .j ava 2s .  c o m
 * @param rng
 * @param max_pairwise_instances
 * @param single
 * @param combiner
 * @return
 */
public static <S, X extends FactoredRepresentation<S>, A extends VirtualConstructor<A>> PairDataset makeBalancedPairDataset(
        final RandomGenerator rng, final int negative_per_instance, final int positive_per_instance,
        final Instances single, final InstanceCombiner combiner) {
    final int Nnegative = negative_per_instance * single.size();
    final int Npositive = positive_per_instance * single.size();
    //      final int max_pairwise = config.getInt( "training.max_pairwise" );
    final ReservoirSampleAccumulator<Pair<Instance, int[]>> negative = new ReservoirSampleAccumulator<Pair<Instance, int[]>>(
            rng, Nnegative);
    final ReservoirSampleAccumulator<Pair<Instance, int[]>> positive = new ReservoirSampleAccumulator<Pair<Instance, int[]>>(
            rng, Npositive);

    for (int i = 0; i < single.size(); ++i) {
        //         if( i % 100 == 0 ) {
        //            System.out.println( "i = " + i );
        //         }
        for (int j = i + 1; j < single.size(); ++j) {
            final Instance ii = single.get(i);
            final Instance ij = single.get(j);
            final int label;
            if (ii.classValue() == ij.classValue()) {
                label = 1;
                if (positive.acceptNext()) {
                    final Instance pair_instance = combiner.apply(ii, ij, label);
                    positive.addPending(Pair.makePair(pair_instance, new int[] { i, j }));
                }
            } else {
                label = 0;
                if (negative.acceptNext()) {
                    final Instance pair_instance = combiner.apply(ii, ij, label);
                    negative.addPending(Pair.makePair(pair_instance, new int[] { i, j }));
                }
            }
        }
    }

    final int N = Math.min(negative.samples().size(), positive.samples().size());
    final String dataset_name = "train_" + combiner.keyword() + "_" + Nnegative + "x" + Npositive;
    final Instances x = new Instances(dataset_name, combiner.attributes(), Nnegative + Npositive);
    x.setClassIndex(x.numAttributes() - 1);
    final ArrayList<int[]> matches = new ArrayList<int[]>();
    for (final Pair<Instance, int[]> ineg : negative.samples()) {
        WekaUtil.addInstance(x, ineg.first);
        matches.add(ineg.second);
    }
    for (final Pair<Instance, int[]> ipos : positive.samples()) {
        WekaUtil.addInstance(x, ipos.first);
        matches.add(ipos.second);
    }

    return new PairDataset(x, matches, combiner);
}

From source file:edu.oregonstate.eecs.mcplan.abstraction.PairDataset.java

License:Open Source License

/**
 * Constructs one positive pair and one negative pair involving each
 * data point in 'single'./*from   w ww. j  a  v a  2 s  .c  o  m*/
 * @param rng
 * @param max_pairwise_instances
 * @param single
 * @param combiner
 * @return
 */
public static <S, X extends FactoredRepresentation<S>, A extends VirtualConstructor<A>> PairDataset makePlausiblePairDataset(
        final RandomGenerator rng, final int negative_per_instance, final int positive_per_instance,
        final Instances single, final InstanceCombiner combiner,
        final Fn.Function2<Boolean, Instance, Instance> plausible_p) {
    final int Nnegative = negative_per_instance * single.size();
    final int Npositive = positive_per_instance * single.size();
    //      final int max_pairwise = config.getInt( "training.max_pairwise" );
    final ReservoirSampleAccumulator<Pair<Instance, int[]>> negative = new ReservoirSampleAccumulator<Pair<Instance, int[]>>(
            rng, Nnegative);
    final ReservoirSampleAccumulator<Pair<Instance, int[]>> positive = new ReservoirSampleAccumulator<Pair<Instance, int[]>>(
            rng, Npositive);

    for (int i = 0; i < single.size(); ++i) {
        //         if( i % 100 == 0 ) {
        //            System.out.println( "i = " + i );
        //         }
        for (int j = i + 1; j < single.size(); ++j) {
            final Instance ii = single.get(i);
            final Instance ij = single.get(j);

            if (!plausible_p.apply(ii, ij)) {
                //               System.out.println( "Not plausible: " + ii + " != " + ij );
                continue;
            }

            //            System.out.println( "! Plausible: " + ii + " == " + ij );

            final int label;
            if (ii.classValue() == ij.classValue()) {
                label = 1;
                if (positive.acceptNext()) {
                    final Instance pair_instance = combiner.apply(ii, ij, label);
                    positive.addPending(Pair.makePair(pair_instance, new int[] { i, j }));
                }
            } else {
                label = 0;
                if (negative.acceptNext()) {
                    final Instance pair_instance = combiner.apply(ii, ij, label);
                    negative.addPending(Pair.makePair(pair_instance, new int[] { i, j }));
                }
            }
        }
    }

    final int N = Math.min(negative.samples().size(), positive.samples().size());
    final String dataset_name = "train_" + combiner.keyword() + "_" + negative.samples().size() + "x"
            + positive.samples().size();
    final Instances x = new Instances(dataset_name, combiner.attributes(), Nnegative + Npositive);
    x.setClassIndex(x.numAttributes() - 1);
    final ArrayList<int[]> matches = new ArrayList<int[]>();
    for (final Pair<Instance, int[]> ineg : negative.samples()) {
        WekaUtil.addInstance(x, ineg.first);
        matches.add(ineg.second);
    }
    for (final Pair<Instance, int[]> ipos : positive.samples()) {
        WekaUtil.addInstance(x, ipos.first);
        matches.add(ipos.second);
    }

    return new PairDataset(x, matches, combiner);
}

From source file:edu.oregonstate.eecs.mcplan.abstraction.WekaUtil.java

License:Open Source License

public static Instances createEmptyInstances(final String name, final ArrayList<Attribute> attributes) {
    final Instances instances = new Instances(name, attributes, 0);
    instances.setClassIndex(attributes.size() - 1);
    return instances;
}

From source file:edu.oregonstate.eecs.mcplan.abstraction.WekaUtil.java

License:Open Source License

/**
 * Creates an Instances object containing the specified feature vector
 * and with an added "dummy label".//w  w w  . ja  va 2s.co  m
 * @param attributes
 * @param features
 * @return
 */
public static Instances createSingletonInstances(final List<Attribute> attributes, final double[] features) {
    final ArrayList<Attribute> attr_dummy_label = new ArrayList<Attribute>(attributes);
    attr_dummy_label.add(createBinaryNominalAttribute("__dummy_label__"));
    final double[] features_dummy_label = new double[features.length + 1];
    Fn.memcpy(features_dummy_label, features, features.length);
    final Instance instance = new DenseInstance(1.0, features_dummy_label);
    final Instances x = new Instances("__eval__", attr_dummy_label, 1);
    x.setClassIndex(attr_dummy_label.size() - 1);
    x.add(instance);
    instance.setDataset(x);
    return x;
}

From source file:edu.oregonstate.eecs.mcplan.abstraction.WekaUtil.java

License:Open Source License

/**
 * Load an ARFF dataset.// w w  w . ja  va 2s  .co m
 *
 * Adapted from:
 * http://weka.wikispaces.com/Use+WEKA+in+your+Java+code
 * @param file
 * @return
 */
public static Instances readLabeledDataset(final File file) {
    try {
        final DataSource source = new DataSource(file.getPath());
        final Instances data = source.getDataSet();
        // setting class attribute if the data format does not provide this information
        // For example, the XRFF format saves the class attribute information as well
        if (data.classIndex() == -1) {
            data.setClassIndex(data.numAttributes() - 1);
        }
        return data;
    } catch (final Exception ex) {
        throw new RuntimeException(ex);
    }
}

From source file:edu.oregonstate.eecs.mcplan.abstraction.WekaUtil.java

License:Open Source License

public static Instances powerSet(final Instances D, final int n) {
    final Attribute class_attr = D.classAttribute();

    final ImmutableSet.Builder<Integer> b = new ImmutableSet.Builder<Integer>();
    final int Nattr = class_attr != null ? D.numAttributes() - 1 : D.numAttributes();
    for (final int i : Fn.range(1, Nattr)) {
        b.add(i);// ww w . ja  v  a  2  s  .c o m
    }
    final Set<Set<Integer>> index = Sets.powerSet(b.build());

    final ArrayList<Attribute> attributes = new ArrayList<Attribute>();
    for (final Set<Integer> subset : index) {
        if (subset.isEmpty() || subset.size() > n) {
            continue;
        }

        final StringBuilder attr_name = new StringBuilder();
        int count = 0;
        for (final Integer i : subset) {
            if (count++ > 0) {
                attr_name.append("_x_");
            }
            attr_name.append(D.attribute(i).name());
        }

        attributes.add(new Attribute(attr_name.toString()));
    }
    if (class_attr != null) {
        assert (class_attr.isNominal());
        attributes.add(WekaUtil.createNominalAttribute(class_attr.name(), class_attr.numValues()));
    }

    final String Pname = "P" + n + "_" + D.relationName();
    final Instances P = new Instances(Pname, attributes, 0);
    if (class_attr != null) {
        P.setClassIndex(attributes.size() - 1);
    }

    for (final Instance inst : D) {
        final double[] xp = new double[attributes.size()];
        int idx = 0;
        for (final Set<Integer> subset : index) {
            if (subset.isEmpty() || subset.size() > n) {
                continue;
            }

            double p = 1.0;
            for (final Integer i : subset) {
                p *= inst.value(i);
            }
            xp[idx++] = p;
        }
        if (class_attr != null) {
            xp[idx++] = inst.classValue();
        }

        WekaUtil.addInstance(P, new DenseInstance(inst.weight(), xp));
    }

    return P;
}

From source file:edu.oregonstate.eecs.mcplan.domains.blackjack.AbstractionDiscovery.java

License:Open Source License

private static <X extends FactoredRepresentation<BlackjackState>> Instances makeTrainingSet(
        final SolvedStateAccumulator<X> acc, final ArrayList<Attribute> attributes, final int iter) {
    final int[] num_instances = new int[2];
    final ArrayList<Instance> negative = new ArrayList<Instance>();
    final ArrayList<Instance> positive = new ArrayList<Instance>();
    final ArrayList<String> nominal = new ArrayList<String>();
    nominal.add("0");
    nominal.add("1");
    attributes.add(new Attribute("__label__", nominal));
    final int d = attributes.size() - 1; // Minus 1 for label

    for (int i = 0; i < acc.Phi_.size(); ++i) {
        final double[] phi_i = acc.Phi_.get(i).toArray();
        for (int j = i + 1; j < acc.Phi_.size(); ++j) {
            final double[] phi_j = acc.Phi_.get(j).toArray();
            final double[] phi_labeled = new double[d + 1];
            for (int k = 0; k < d; ++k) {
                phi_labeled[k] = Math.abs(phi_i[k] - phi_j[k]);
            }/*  w  w  w .ja v a2s  .  co  m*/
            final int label;
            if (acc.actions_.get(i).equals(acc.actions_.get(j))) {
                label = 1;
            } else {
                label = 0;
            }
            final double weight = 1.0; // TODO: Weights?
            final String label_string = Integer.toString(label);
            phi_labeled[d] = label; //attributes.get( label_index ).indexOfValue( label_string );

            num_instances[label] += 1;

            final Instance instance = new DenseInstance(weight, phi_labeled);
            if (label == 0) {
                negative.add(instance);
            } else {
                positive.add(instance);
            }
        }
    }
    System.out.println("num_instances = " + Arrays.toString(num_instances));
    final Instances x = new Instances("train" + iter, attributes, negative.size() + positive.size());
    x.setClassIndex(d);
    x.addAll(negative);
    x.addAll(positive);

    return x;
}

From source file:edu.oregonstate.eecs.mcplan.ml.GameTreeStateSimilarityDataset.java

License:Open Source License

@Override
public void run() {
    System.out.println("*** Extracting state nodes");
    final Visitor visitor = new Visitor();
    tree_.root().accept(visitor);/*from w  w w .  j  a va  2  s . co m*/

    // This extracts only the level-1 nodes.
    // TODO: Do this somewhere better.
    final HashMap<List<ActionNode<S, A>>, List<StateNode<S, A>>> tx = new HashMap<List<ActionNode<S, A>>, List<StateNode<S, A>>>();
    final ArrayList<StateNode<S, A>> depth_1 = new ArrayList<StateNode<S, A>>();
    for (final Map.Entry<List<ActionNode<S, A>>, List<StateNode<S, A>>> e : visitor.xs.entrySet()) {
        if (e.getKey() == null || e.getKey().size() != 1) {
            continue;
        } else {
            depth_1.addAll(e.getValue());
        }
    }
    tx.put(null, depth_1);

    final Comparator<Instance> weight_comp = new Comparator<Instance>() {
        @Override
        public int compare(final Instance a, final Instance b) {
            return (int) Math.signum(a.weight() - b.weight());
        }
    };
    final int max_cap = max_instances_ + 1;
    final PriorityQueue<Instance> positive = new PriorityQueue<Instance>(max_cap, weight_comp);
    final PriorityQueue<Instance> negative = new PriorityQueue<Instance>(max_cap, weight_comp);
    System.out.println("*** Building Instances");
    for (final Map.Entry<List<ActionNode<S, A>>, List<StateNode<S, A>>> e : tx.entrySet()) {
        System.out.println("***** key = " + e.getKey() + ", value.size() = " + e.getValue().size());

        final String name = (e.getKey() != null ? e.getKey().toString() : "null");
        final List<StateNode<S, A>> values = e.getValue();
        final int[] num_instances = { 0, 0 };
        int count = 0;
        for (int i = 0; i < values.size(); ++i) {
            for (int j = i + 1; j < values.size(); ++j) {
                if (count++ % 100 == 0) {
                    System.out.println("***** instance " + (count - 1));
                }

                final StateNode<S, A> s_i = values.get(i);
                final StateNode<S, A> s_j = values.get(j);
                if (s_i.n() < min_samples_ || s_j.n() < min_samples_) {
                    System.out.println("! skipping under-sampled state pair");
                    continue;
                }
                final double[] phi_i = s_i.token.phi();
                final double[] phi_j = s_j.token.phi();
                assert (phi_i.length == phi_j.length);
                if (phi_i.length != attributes_.size() - 1) {
                    System.out.println("! phi_i.length = " + phi_i.length);
                    System.out.println("! attributes_.size() = " + attributes_.size());
                }
                assert (phi_i.length == attributes_.size() - 1);
                // Feature vector is absolute difference of the two state
                // feature vectors.
                final double[] phi_labeled = new double[phi_i.length + 1];
                for (int k = 0; k < phi_i.length; ++k) {
                    phi_labeled[k] = Math.abs(phi_i[k] - phi_j[k]);
                }
                final Tuple2<Integer, Double> labeled = label(e.getKey(), player_, s_i, s_j);
                final int label = labeled._1;
                final double weight = labeled._2;
                final String label_string = Integer.toString(label);
                phi_labeled[label_index] = label; //attributes.get( label_index ).indexOfValue( label_string );

                num_instances[label] += 1;

                final Instance instance = new DenseInstance(weight, phi_labeled);
                if (label == 0) {
                    negative.add(instance);
                    if (negative.size() >= max_cap) {
                        negative.poll();
                    }
                } else {
                    positive.add(instance);
                    if (positive.size() >= max_cap) {
                        positive.poll();
                    }
                }
            } // for j
        } // for i
        System.out.println("num_instances = " + Arrays.toString(num_instances));
        final Instances x = new Instances(name, attributes_, negative.size() + positive.size());
        x.setClassIndex(label_index);
        x.addAll(negative);
        x.addAll(positive);
        xs_.put(e.getKey(), x);
    }
}

From source file:edu.uga.cs.fluxbuster.classification.Classifier.java

License:Open Source License

/**
 * Executes the classifier.//from w ww  .  j  ava  2  s.c  o m
 * 
 * @param prepfeatures the prepared features in arff format
 * @param modelfile the path to the serialized model
 * @param clusters the clusters to classify
 * @return a map of the classified clusters, the keys are the classes
 *       and the values are lists of cluster id's belonging to those classes
 */
private Map<ClusterClass, List<StoredDomainCluster>> executeClassifier(String prepfeatures, String modelfile,
        List<StoredDomainCluster> clusters) {
    Map<ClusterClass, List<StoredDomainCluster>> retval = new HashMap<ClusterClass, List<StoredDomainCluster>>();
    try {
        DataSource source = new DataSource(new ByteArrayInputStream(prepfeatures.getBytes()));
        Instances data = source.getDataSet();
        if (data.classIndex() == -1) {
            data.setClassIndex(data.numAttributes() - 1);
        }
        String[] options = weka.core.Utils.splitOptions("-p 0");
        J48 cls = (J48) weka.core.SerializationHelper.read(modelfile);
        cls.setOptions(options);
        for (int i = 0; i < data.numInstances(); i++) {
            double pred = cls.classifyInstance(data.instance(i));
            ClusterClass clusClass = ClusterClass
                    .valueOf(data.classAttribute().value((int) pred).toUpperCase());
            if (!retval.containsKey(clusClass)) {
                retval.put(clusClass, new ArrayList<StoredDomainCluster>());
            }
            retval.get(clusClass).add(clusters.get(i));
        }
    } catch (Exception e) {
        if (log.isErrorEnabled()) {
            log.error("Error executing classifier.", e);
        }
    }
    return retval;
}

From source file:edu.umbc.cs.maple.utils.WekaUtils.java

License:Open Source License

/** Converts the instances in the given dataset to binary, setting the specified labels to positive.
 * Note this method is destructive to data, directly modifying its contents.
 * @param data the multiclass dataset to be converted to binary.
 * @param positiveClassValue the class value to treat as positive.
 *//*from w  ww . jav a2  s .  com*/
public static void convertMulticlassToBinary(Instances data, String positiveClassValue) {

    // ensure that data is nominal
    if (!data.classAttribute().isNominal())
        throw new IllegalArgumentException("Instances must have a nominal class.");

    // create the new class attribute
    FastVector newClasses = new FastVector(2);
    newClasses.addElement("Y");
    newClasses.addElement("N");
    Attribute newClassAttribute = new Attribute("class", newClasses);

    // alter the class attribute to be binary
    int newClassAttIdx = data.classIndex();
    data.insertAttributeAt(newClassAttribute, newClassAttIdx);
    int classAttIdx = data.classIndex();

    // set the instances classes to be binary, with the labels [Y,N] (indices 0 and 1 respectively)
    int numInstances = data.numInstances();
    for (int instIdx = 0; instIdx < numInstances; instIdx++) {
        Instance inst = data.instance(instIdx);
        if (inst.stringValue(classAttIdx).equals(positiveClassValue)) {
            inst.setValue(newClassAttIdx, 0); // set it to the first class, which will be Y
        } else {
            inst.setValue(newClassAttIdx, 1); // set it to the second class, which will be 0
        }
    }

    // switch the class index to the new class and delete the old class
    data.setClassIndex(newClassAttIdx);
    data.deleteAttributeAt(classAttIdx);

    // alter the dataset name
    data.setRelationName(data.relationName() + "-" + positiveClassValue);
}