List of usage examples for weka.core Instances get
@Override
publicInstance get(int index)
From source file:edu.oregonstate.eecs.mcplan.abstraction.EvaluateSimilarityFunction.java
License:Open Source License
/** * @param args/* w ww. ja v a2 s.co m*/ * @throws IOException * @throws FileNotFoundException */ public static void main(final String[] args) throws FileNotFoundException, IOException { final String experiment_file = args[0]; final File root_directory; if (args.length > 1) { root_directory = new File(args[1]); } else { root_directory = new File("."); } final CsvConfigurationParser csv_config = new CsvConfigurationParser(new FileReader(experiment_file)); final String experiment_name = FilenameUtils.getBaseName(experiment_file); final File expr_directory = new File(root_directory, experiment_name); expr_directory.mkdirs(); final Csv.Writer csv = new Csv.Writer( new PrintStream(new FileOutputStream(new File(expr_directory, "results.csv")))); final String[] parameter_headers = new String[] { "kpca.kernel", "kpca.rbf.sigma", "kpca.random_forest.Ntrees", "kpca.random_forest.max_depth", "kpca.Nbases", "multiclass.classifier", "multiclass.random_forest.Ntrees", "multiclass.random_forest.max_depth", "pairwise_classifier.max_branching", "training.label_noise" }; csv.cell("domain").cell("abstraction"); for (final String p : parameter_headers) { csv.cell(p); } csv.cell("Ntrain").cell("Ntest").cell("ami.mean").cell("ami.variance").cell("ami.confidence").newline(); for (int expr = 0; expr < csv_config.size(); ++expr) { try { final KeyValueStore expr_config = csv_config.get(expr); final Configuration config = new Configuration(root_directory.getPath(), expr_directory.getName(), expr_config); System.out.println("[Loading '" + config.training_data_single + "']"); final Instances single = WekaUtil .readLabeledDataset(new File(root_directory, config.training_data_single + ".arff")); final Instances train = new Instances(single, 0); final int[] idx = Fn.range(0, single.size()); int instance_counter = 0; Fn.shuffle(config.rng, idx); final int Ntrain = config.getInt("Ntrain_games"); // TODO: Rename? final double label_noise = config.getDouble("training.label_noise"); final int Nlabels = train.classAttribute().numValues(); assert (Nlabels > 0); for (int i = 0; i < Ntrain; ++i) { final Instance inst = single.get(idx[instance_counter++]); if (label_noise > 0 && config.rng.nextDouble() < label_noise) { int noisy_label = 0; do { noisy_label = config.rng.nextInt(Nlabels); } while (noisy_label == (int) inst.classValue()); System.out.println("Noisy label (" + inst.classValue() + " -> " + noisy_label + ")"); inst.setClassValue(noisy_label); } train.add(inst); inst.setDataset(train); } final Fn.Function2<Boolean, Instance, Instance> plausible_p = createPlausiblePredicate(config); final int Ntest = config.Ntest_games; int Ntest_added = 0; final ArrayList<Instances> tests = new ArrayList<Instances>(); while (instance_counter < single.size() && Ntest_added < Ntest) { final Instance inst = single.get(idx[instance_counter++]); boolean found = false; for (final Instances test : tests) { // Note that 'plausible_p' should be transitive if (plausible_p.apply(inst, test.get(0))) { WekaUtil.addInstance(test, inst); if (test.size() == 30) { Ntest_added += test.size(); } else if (test.size() > 30) { Ntest_added += 1; } found = true; break; } } if (!found) { final Instances test = new Instances(single, 0); WekaUtil.addInstance(test, inst); tests.add(test); } } final Iterator<Instances> test_itr = tests.iterator(); while (test_itr.hasNext()) { if (test_itr.next().size() < 30) { test_itr.remove(); } } System.out.println("=== tests.size() = " + tests.size()); System.out.println("=== Ntest_added = " + Ntest_added); System.out.println("[Training]"); final Evaluator evaluator = createEvaluator(config, train); // final Instances transformed_test = evaluator.prepareInstances( test ); System.out.println("[Evaluating]"); final int Nxval = evaluator.isSensitiveToOrdering() ? 10 : 1; final MeanVarianceAccumulator ami = new MeanVarianceAccumulator(); final MeanVarianceAccumulator errors = new MeanVarianceAccumulator(); final MeanVarianceAccumulator relative_error = new MeanVarianceAccumulator(); int c = 0; for (int xval = 0; xval < Nxval; ++xval) { for (final Instances test : tests) { // TODO: Debugging WekaUtil.writeDataset(new File(config.root_directory), "test_" + (c++), test); // transformed_test.randomize( new RandomAdaptor( config.rng ) ); // final ClusterContingencyTable ct = evaluator.evaluate( transformed_test ); test.randomize(new RandomAdaptor(config.rng)); final ClusterContingencyTable ct = evaluator.evaluate(test); System.out.println(ct); int Nerrors = 0; final MeanVarianceAccumulator mv = new MeanVarianceAccumulator(); for (int i = 0; i < ct.R; ++i) { final int max = Fn.max(ct.n[i]); Nerrors += (ct.a[i] - max); mv.add(((double) ct.a[i]) / ct.N * Nerrors / ct.a[i]); } errors.add(Nerrors); relative_error.add(mv.mean()); System.out.println("exemplar: " + test.get(0)); System.out.println("Nerrors = " + Nerrors); final PrintStream ct_out = new PrintStream( new FileOutputStream(new File(expr_directory, "ct_" + expr + "_" + xval + ".csv"))); ct.writeCsv(ct_out); ct_out.close(); final double ct_ami = ct.adjustedMutualInformation_max(); if (Double.isNaN(ct_ami)) { System.out.println("! ct_ami = NaN"); } else { ami.add(ct_ami); } System.out.println(); } } System.out.println("errors = " + errors.mean() + " (" + errors.confidence() + ")"); System.out.println( "relative_error = " + relative_error.mean() + " (" + relative_error.confidence() + ")"); System.out.println("AMI_max = " + ami.mean() + " (" + ami.confidence() + ")"); csv.cell(config.domain).cell(config.get("abstraction.discovery")); for (final String p : parameter_headers) { csv.cell(config.get(p)); } csv.cell(Ntrain).cell(Ntest).cell(ami.mean()).cell(ami.variance()).cell(ami.confidence()).newline(); } catch (final Exception ex) { ex.printStackTrace(); } } }
From source file:edu.oregonstate.eecs.mcplan.abstraction.Experiments.java
License:Open Source License
/** * Constructs an InformationTheoreticMetricLearner from a set of labeled * state vector differences.// w w w .j a v a 2 s .co m * * @param config * @param A0 * @param XL A labeled set of state vector differences. The label must be * the last attribute, and it must be 1 if the states are similar and 0 * if they are not. * @return */ private static <A> InformationTheoreticMetricLearner learnMetric(final Configuration config, final RealMatrix A0, final Instances XL) { final int d = XL.numAttributes() - 1; //XL.get( 0 ).getDimension(); System.out.println("d = " + d); final double u; final double ell; final double gamma = config.getDouble("itml.gamma"); // We will set 'ell' and 'u' using sample quantiles as described in // the ITML paper. final QuantileAccumulator qacc = new QuantileAccumulator(0.05, 0.95); final ArrayList<double[]> S = new ArrayList<double[]>(); final ArrayList<double[]> D = new ArrayList<double[]>(); for (int i = 0; i < XL.size(); ++i) { final Instance ii = XL.get(i); final double diff[] = new double[d]; for (int j = 0; j < d; ++j) { diff[j] = ii.value(j); } if (ii.classValue() == 0.0) { D.add(diff); } else { S.add(diff); } qacc.add(Math.sqrt(HilbertSpace.inner_prod(diff, A0, diff))); } // Set bounds to quantile esimates ell = qacc.estimates[0]; u = qacc.estimates[1]; System.out.println("ITML: ell = " + ell); System.out.println("ITML: u = " + u); final InformationTheoreticMetricLearner itml = new InformationTheoreticMetricLearner(S, D, u, ell, A0, gamma, config.rng); itml.run(); return itml; }
From source file:edu.oregonstate.eecs.mcplan.abstraction.PairDataset.java
License:Open Source License
public static <S, X extends FactoredRepresentation<S>, A extends VirtualConstructor<A>> ArrayList<PairInstance> makePairDataset( final RandomGenerator rng, final int max_pairwise_instances, final Instances single) { final ReservoirSampleAccumulator<PairInstance> negative = new ReservoirSampleAccumulator<PairInstance>(rng, max_pairwise_instances);/* w w w .jav a 2s . c o m*/ final ReservoirSampleAccumulator<PairInstance> positive = new ReservoirSampleAccumulator<PairInstance>(rng, max_pairwise_instances); for (int i = 0; i < single.size(); ++i) { for (int j = i + 1; j < single.size(); ++j) { final Instance ii = single.get(i); final Instance ij = single.get(j); final int label; if (ii.classValue() == ij.classValue()) { label = 1; if (positive.acceptNext()) { final PairInstance pair_instance = new PairInstance(ii.toDoubleArray(), ij.toDoubleArray(), label); positive.addPending(pair_instance); } } else { label = 0; if (negative.acceptNext()) { final PairInstance pair_instance = new PairInstance(ii.toDoubleArray(), ij.toDoubleArray(), label); negative.addPending(pair_instance); } } } } final ArrayList<PairInstance> result = new ArrayList<PairInstance>(negative.n() + positive.n()); result.addAll(negative.samples()); result.addAll(positive.samples()); return result; }
From source file:edu.oregonstate.eecs.mcplan.abstraction.PairDataset.java
License:Open Source License
public static <S, X extends FactoredRepresentation<S>, A extends VirtualConstructor<A>> Instances makePairDataset( final RandomGenerator rng, final int max_pairwise_instances, final Instances single, final InstanceCombiner combiner) { // final int max_pairwise = config.getInt( "training.max_pairwise" ); final ReservoirSampleAccumulator<Instance> negative = new ReservoirSampleAccumulator<Instance>(rng, max_pairwise_instances);/*from w w w.j a va2 s. c o m*/ final ReservoirSampleAccumulator<Instance> positive = new ReservoirSampleAccumulator<Instance>(rng, max_pairwise_instances); for (int i = 0; i < single.size(); ++i) { // if( i % 100 == 0 ) { // System.out.println( "i = " + i ); // } for (int j = i + 1; j < single.size(); ++j) { final Instance ii = single.get(i); final Instance ij = single.get(j); final int label; if (ii.classValue() == ij.classValue()) { label = 1; if (positive.acceptNext()) { final Instance pair_instance = combiner.apply(ii, ij, label); positive.addPending(pair_instance); } } else { label = 0; if (negative.acceptNext()) { final Instance pair_instance = combiner.apply(ii, ij, label); negative.addPending(pair_instance); } } } } final int N = Math.min(negative.samples().size(), positive.samples().size()); final String dataset_name = "train_" + combiner.keyword() + "_" + max_pairwise_instances; final Instances x = new Instances(dataset_name, combiner.attributes(), 2 * N); x.setClassIndex(x.numAttributes() - 1); for (final Instance ineg : negative.samples()) { x.add(ineg); } for (final Instance ipos : positive.samples()) { x.add(ipos); } return x; // return new PairDataset( x, combiner ); }
From source file:edu.oregonstate.eecs.mcplan.abstraction.PairDataset.java
License:Open Source License
/** * Constructs one positive pair and one negative pair involving each * data point in 'single'./*w w w . j av a2 s.c om*/ * @param rng * @param max_pairwise_instances * @param single * @param combiner * @return */ public static <S, X extends FactoredRepresentation<S>, A extends VirtualConstructor<A>> PairDataset makeBalancedPairDataset( final RandomGenerator rng, final int negative_per_instance, final int positive_per_instance, final Instances single, final InstanceCombiner combiner) { final int Nnegative = negative_per_instance * single.size(); final int Npositive = positive_per_instance * single.size(); // final int max_pairwise = config.getInt( "training.max_pairwise" ); final ReservoirSampleAccumulator<Pair<Instance, int[]>> negative = new ReservoirSampleAccumulator<Pair<Instance, int[]>>( rng, Nnegative); final ReservoirSampleAccumulator<Pair<Instance, int[]>> positive = new ReservoirSampleAccumulator<Pair<Instance, int[]>>( rng, Npositive); for (int i = 0; i < single.size(); ++i) { // if( i % 100 == 0 ) { // System.out.println( "i = " + i ); // } for (int j = i + 1; j < single.size(); ++j) { final Instance ii = single.get(i); final Instance ij = single.get(j); final int label; if (ii.classValue() == ij.classValue()) { label = 1; if (positive.acceptNext()) { final Instance pair_instance = combiner.apply(ii, ij, label); positive.addPending(Pair.makePair(pair_instance, new int[] { i, j })); } } else { label = 0; if (negative.acceptNext()) { final Instance pair_instance = combiner.apply(ii, ij, label); negative.addPending(Pair.makePair(pair_instance, new int[] { i, j })); } } } } final int N = Math.min(negative.samples().size(), positive.samples().size()); final String dataset_name = "train_" + combiner.keyword() + "_" + Nnegative + "x" + Npositive; final Instances x = new Instances(dataset_name, combiner.attributes(), Nnegative + Npositive); x.setClassIndex(x.numAttributes() - 1); final ArrayList<int[]> matches = new ArrayList<int[]>(); for (final Pair<Instance, int[]> ineg : negative.samples()) { WekaUtil.addInstance(x, ineg.first); matches.add(ineg.second); } for (final Pair<Instance, int[]> ipos : positive.samples()) { WekaUtil.addInstance(x, ipos.first); matches.add(ipos.second); } return new PairDataset(x, matches, combiner); }
From source file:edu.oregonstate.eecs.mcplan.abstraction.PairDataset.java
License:Open Source License
/** * Constructs one positive pair and one negative pair involving each * data point in 'single'./* w w w . jav a2 s. c o m*/ * @param rng * @param max_pairwise_instances * @param single * @param combiner * @return */ public static <S, X extends FactoredRepresentation<S>, A extends VirtualConstructor<A>> PairDataset makePlausiblePairDataset( final RandomGenerator rng, final int negative_per_instance, final int positive_per_instance, final Instances single, final InstanceCombiner combiner, final Fn.Function2<Boolean, Instance, Instance> plausible_p) { final int Nnegative = negative_per_instance * single.size(); final int Npositive = positive_per_instance * single.size(); // final int max_pairwise = config.getInt( "training.max_pairwise" ); final ReservoirSampleAccumulator<Pair<Instance, int[]>> negative = new ReservoirSampleAccumulator<Pair<Instance, int[]>>( rng, Nnegative); final ReservoirSampleAccumulator<Pair<Instance, int[]>> positive = new ReservoirSampleAccumulator<Pair<Instance, int[]>>( rng, Npositive); for (int i = 0; i < single.size(); ++i) { // if( i % 100 == 0 ) { // System.out.println( "i = " + i ); // } for (int j = i + 1; j < single.size(); ++j) { final Instance ii = single.get(i); final Instance ij = single.get(j); if (!plausible_p.apply(ii, ij)) { // System.out.println( "Not plausible: " + ii + " != " + ij ); continue; } // System.out.println( "! Plausible: " + ii + " == " + ij ); final int label; if (ii.classValue() == ij.classValue()) { label = 1; if (positive.acceptNext()) { final Instance pair_instance = combiner.apply(ii, ij, label); positive.addPending(Pair.makePair(pair_instance, new int[] { i, j })); } } else { label = 0; if (negative.acceptNext()) { final Instance pair_instance = combiner.apply(ii, ij, label); negative.addPending(Pair.makePair(pair_instance, new int[] { i, j })); } } } } final int N = Math.min(negative.samples().size(), positive.samples().size()); final String dataset_name = "train_" + combiner.keyword() + "_" + negative.samples().size() + "x" + positive.samples().size(); final Instances x = new Instances(dataset_name, combiner.attributes(), Nnegative + Npositive); x.setClassIndex(x.numAttributes() - 1); final ArrayList<int[]> matches = new ArrayList<int[]>(); for (final Pair<Instance, int[]> ineg : negative.samples()) { WekaUtil.addInstance(x, ineg.first); matches.add(ineg.second); } for (final Pair<Instance, int[]> ipos : positive.samples()) { WekaUtil.addInstance(x, ipos.first); matches.add(ipos.second); } return new PairDataset(x, matches, combiner); }
From source file:edu.oregonstate.eecs.mcplan.abstraction.WekaUtil.java
License:Open Source License
/** * Classify a feature vector that is not part of an Instances object. * @param classifier/*w w w . j a v a2s.c om*/ * @param attributes * @param features * @return */ public static double classify(final Classifier classifier, final List<Attribute> attributes, final double[] features) { final Instances x = createSingletonInstances(attributes, features); try { return classifier.classifyInstance(x.get(0)); } catch (final Exception ex) { throw new RuntimeException(ex); } }
From source file:edu.oregonstate.eecs.mcplan.abstraction.WekaUtil.java
License:Open Source License
/** * Classify a feature vector that is not part of an Instances object. * @param classifier//w ww . j a v a 2 s. c om * @param attributes * @param features * @return */ public static double[] distribution(final Classifier classifier, final List<Attribute> attributes, final double[] features) { final Instances x = createSingletonInstances(attributes, features); try { return classifier.distributionForInstance(x.get(0)); } catch (final Exception ex) { throw new RuntimeException(ex); } }
From source file:edu.oregonstate.eecs.mcplan.abstraction.WekaUtil.java
License:Open Source License
public static Pair<ArrayList<double[]>, int[]> splitLabels(final Instances train) { assert (train.classAttribute() != null); final ArrayList<double[]> X = new ArrayList<double[]>(); final int[] Y = new int[train.size()]; for (int i = 0; i < train.size(); ++i) { final Instance inst = train.get(i); final double[] x = new double[train.numAttributes() - 1]; int idx = 0; for (int j = 0; j < train.numAttributes(); ++j) { if (j == train.classIndex()) { Y[i] = (int) inst.classValue(); } else { x[idx++] = inst.value(j); }/*from w w w . j a v a 2s .c o m*/ } X.add(x); } return Pair.makePair(X, Y); }
From source file:edu.oregonstate.eecs.mcplan.ml.WekaGlue.java
License:Open Source License
public static SequentialProjectionHashLearner createSequentialProjectionHashLearner(final RandomGenerator rng, final Instances labeled, final Instances unlabeled, final int K, final double eta, final double alpha) { assert (labeled.classIndex() >= 0); final int Nfeatures = labeled.numAttributes() - 1; final RealMatrix X = new Array2DRowRealMatrix(Nfeatures, labeled.size() + unlabeled.size()); final RealMatrix XL = new Array2DRowRealMatrix(Nfeatures, labeled.size() * 2); final RealMatrix S = new Array2DRowRealMatrix(XL.getColumnDimension(), XL.getColumnDimension()); for (int j = 0; j < labeled.size(); ++j) { final Instance inst = labeled.get(j); for (int i = 0; i < XL.getRowDimension(); ++i) { X.setEntry(i, j, inst.value(i)); XL.setEntry(i, j, inst.value(i)); }//from w w w .j ava 2 s. co m int sj = -1; Instance s = null; do { sj = rng.nextInt(labeled.size()); s = labeled.get(sj); } while (s == inst || s.classValue() != inst.classValue()); S.setEntry(j, sj, 1); int dj = -1; Instance d = null; do { dj = rng.nextInt(labeled.size()); d = labeled.get(dj); } while (d == inst || d.classValue() == inst.classValue()); S.setEntry(j, dj, -1); } for (int j = 0; j < unlabeled.size(); ++j) { final Instance inst = unlabeled.get(j); for (int i = 0; i < X.getRowDimension(); ++i) { X.setEntry(i, labeled.size() + j, inst.value(i)); } } return new SequentialProjectionHashLearner(X, XL, S, K, eta, alpha); }