List of usage examples for weka.core Instances setClassIndex
public void setClassIndex(int classIndex)
From source file:DiversifyTopKShaepelet.DiversifyTopKShaepelet.java
/** * Sets the format of the filtered instances that are output. I.e. will * include k attributes each shapelet distance and a class value * * @param inputFormat the format of the input data * @return a new Instances object in the desired output format * @throws Exception if all required parameters of the filter are not * initialised correctly//from w ww . j a va 2s . com */ @Override protected Instances determineOutputFormat(Instances inputFormat) throws Exception { if (this.numShapelets < 1) { throw new Exception( "ShapeletFilter not initialised correctly - please specify a value of k that is greater than or equal to 1"); } //Set up instances size and format. //int length = this.numShapelets; int length = this.shapelets.size(); FastVector atts = new FastVector(); String name; for (int i = 0; i < length; i++) { name = "Shapelet_" + i; atts.addElement(new Attribute(name)); } if (inputFormat.classIndex() >= 0) { //Classification set, set class //Get the class values as a fast vector Attribute target = inputFormat.attribute(inputFormat.classIndex()); FastVector vals = new FastVector(target.numValues()); for (int i = 0; i < target.numValues(); i++) { vals.addElement(target.value(i)); } atts.addElement(new Attribute(inputFormat.attribute(inputFormat.classIndex()).name(), vals)); } Instances result = new Instances("Shapelets" + inputFormat.relationName(), atts, inputFormat.numInstances()); if (inputFormat.classIndex() >= 0) { result.setClassIndex(result.numAttributes() - 1); } return result; }
From source file:dkpro.similarity.experiments.rte.util.Evaluator.java
License:Open Source License
public static void runClassifier(WekaClassifier wekaClassifier, Dataset trainDataset, Dataset testDataset) throws Exception { Classifier baseClassifier = ClassifierSimilarityMeasure.getClassifier(wekaClassifier); // Set up the random number generator long seed = new Date().getTime(); Random random = new Random(seed); // Add IDs to the train instances and get the instances AddID.main(new String[] { "-i", MODELS_DIR + "/" + trainDataset.toString() + ".arff", "-o", MODELS_DIR + "/" + trainDataset.toString() + "-plusIDs.arff" }); Instances train = DataSource.read(MODELS_DIR + "/" + trainDataset.toString() + "-plusIDs.arff"); train.setClassIndex(train.numAttributes() - 1); // Add IDs to the test instances and get the instances AddID.main(new String[] { "-i", MODELS_DIR + "/" + testDataset.toString() + ".arff", "-o", MODELS_DIR + "/" + testDataset.toString() + "-plusIDs.arff" }); Instances test = DataSource.read(MODELS_DIR + "/" + testDataset.toString() + "-plusIDs.arff"); test.setClassIndex(test.numAttributes() - 1); // Instantiate the Remove filter Remove removeIDFilter = new Remove(); removeIDFilter.setAttributeIndices("first"); // Randomize the data test.randomize(random);//from w w w. j a v a2s. c om // Apply log filter // Filter logFilter = new LogFilter(); // logFilter.setInputFormat(train); // train = Filter.useFilter(train, logFilter); // logFilter.setInputFormat(test); // test = Filter.useFilter(test, logFilter); // Copy the classifier Classifier classifier = AbstractClassifier.makeCopy(baseClassifier); // Instantiate the FilteredClassifier FilteredClassifier filteredClassifier = new FilteredClassifier(); filteredClassifier.setFilter(removeIDFilter); filteredClassifier.setClassifier(classifier); // Build the classifier filteredClassifier.buildClassifier(train); // Prepare the output buffer AbstractOutput output = new PlainText(); output.setBuffer(new StringBuffer()); output.setHeader(test); output.setAttributes("first"); Evaluation eval = new Evaluation(train); eval.evaluateModel(filteredClassifier, test, output); // Convert predictions to CSV // Format: inst#, actual, predicted, error, probability, (ID) String[] scores = new String[new Double(eval.numInstances()).intValue()]; double[] probabilities = new double[new Double(eval.numInstances()).intValue()]; for (String line : output.getBuffer().toString().split("\n")) { String[] linesplit = line.split("\\s+"); // If there's been an error, the length of linesplit is 6, otherwise 5, // due to the error flag "+" int id; String expectedValue, classification; double probability; if (line.contains("+")) { id = Integer.parseInt(linesplit[6].substring(1, linesplit[6].length() - 1)); expectedValue = linesplit[2].substring(2); classification = linesplit[3].substring(2); probability = Double.parseDouble(linesplit[5]); } else { id = Integer.parseInt(linesplit[5].substring(1, linesplit[5].length() - 1)); expectedValue = linesplit[2].substring(2); classification = linesplit[3].substring(2); probability = Double.parseDouble(linesplit[4]); } scores[id - 1] = classification; probabilities[id - 1] = probability; } System.out.println(eval.toSummaryString()); System.out.println(eval.toMatrixString()); // Output classifications StringBuilder sb = new StringBuilder(); for (String score : scores) sb.append(score.toString() + LF); FileUtils.writeStringToFile(new File(OUTPUT_DIR + "/" + testDataset.toString() + "/" + wekaClassifier.toString() + "/" + testDataset.toString() + ".csv"), sb.toString()); // Output probabilities sb = new StringBuilder(); for (Double probability : probabilities) sb.append(probability.toString() + LF); FileUtils.writeStringToFile(new File(OUTPUT_DIR + "/" + testDataset.toString() + "/" + wekaClassifier.toString() + "/" + testDataset.toString() + ".probabilities.csv"), sb.toString()); // Output predictions FileUtils.writeStringToFile(new File(OUTPUT_DIR + "/" + testDataset.toString() + "/" + wekaClassifier.toString() + "/" + testDataset.toString() + ".predictions.txt"), output.getBuffer().toString()); // Output meta information sb = new StringBuilder(); sb.append(classifier.toString() + LF); sb.append(eval.toSummaryString() + LF); sb.append(eval.toMatrixString() + LF); FileUtils.writeStringToFile(new File(OUTPUT_DIR + "/" + testDataset.toString() + "/" + wekaClassifier.toString() + "/" + testDataset.toString() + ".meta.txt"), sb.toString()); }
From source file:dkpro.similarity.experiments.rte.util.Evaluator.java
License:Open Source License
public static void runClassifierCV(WekaClassifier wekaClassifier, Dataset dataset) throws Exception { // Set parameters int folds = 10; Classifier baseClassifier = ClassifierSimilarityMeasure.getClassifier(wekaClassifier); // Set up the random number generator long seed = new Date().getTime(); Random random = new Random(seed); // Add IDs to the instances AddID.main(new String[] { "-i", MODELS_DIR + "/" + dataset.toString() + ".arff", "-o", MODELS_DIR + "/" + dataset.toString() + "-plusIDs.arff" }); Instances data = DataSource.read(MODELS_DIR + "/" + dataset.toString() + "-plusIDs.arff"); data.setClassIndex(data.numAttributes() - 1); // Instantiate the Remove filter Remove removeIDFilter = new Remove(); removeIDFilter.setAttributeIndices("first"); // Randomize the data data.randomize(random);//from w ww . j a v a2 s.c o m // Perform cross-validation Instances predictedData = null; Evaluation eval = new Evaluation(data); for (int n = 0; n < folds; n++) { Instances train = data.trainCV(folds, n, random); Instances test = data.testCV(folds, n); // Apply log filter // Filter logFilter = new LogFilter(); // logFilter.setInputFormat(train); // train = Filter.useFilter(train, logFilter); // logFilter.setInputFormat(test); // test = Filter.useFilter(test, logFilter); // Copy the classifier Classifier classifier = AbstractClassifier.makeCopy(baseClassifier); // Instantiate the FilteredClassifier FilteredClassifier filteredClassifier = new FilteredClassifier(); filteredClassifier.setFilter(removeIDFilter); filteredClassifier.setClassifier(classifier); // Build the classifier filteredClassifier.buildClassifier(train); // Evaluate eval.evaluateModel(filteredClassifier, test); // Add predictions AddClassification filter = new AddClassification(); filter.setClassifier(classifier); filter.setOutputClassification(true); filter.setOutputDistribution(false); filter.setOutputErrorFlag(true); filter.setInputFormat(train); Filter.useFilter(train, filter); // trains the classifier Instances pred = Filter.useFilter(test, filter); // performs predictions on test set if (predictedData == null) predictedData = new Instances(pred, 0); for (int j = 0; j < pred.numInstances(); j++) predictedData.add(pred.instance(j)); } System.out.println(eval.toSummaryString()); System.out.println(eval.toMatrixString()); // Prepare output scores String[] scores = new String[predictedData.numInstances()]; for (Instance predInst : predictedData) { int id = new Double(predInst.value(predInst.attribute(0))).intValue() - 1; int valueIdx = predictedData.numAttributes() - 2; String value = predInst.stringValue(predInst.attribute(valueIdx)); scores[id] = value; } // Output classifications StringBuilder sb = new StringBuilder(); for (String score : scores) sb.append(score.toString() + LF); FileUtils.writeStringToFile(new File(OUTPUT_DIR + "/" + dataset.toString() + "/" + wekaClassifier.toString() + "/" + dataset.toString() + ".csv"), sb.toString()); // Output prediction arff DataSink.write(OUTPUT_DIR + "/" + dataset.toString() + "/" + wekaClassifier.toString() + "/" + dataset.toString() + ".predicted.arff", predictedData); // Output meta information sb = new StringBuilder(); sb.append(baseClassifier.toString() + LF); sb.append(eval.toSummaryString() + LF); sb.append(eval.toMatrixString() + LF); FileUtils.writeStringToFile(new File(OUTPUT_DIR + "/" + dataset.toString() + "/" + wekaClassifier.toString() + "/" + dataset.toString() + ".meta.txt"), sb.toString()); }
From source file:dkpro.similarity.experiments.sts2013.util.Evaluator.java
License:Open Source License
public static void runLinearRegressionCV(Mode mode, Dataset... datasets) throws Exception { for (Dataset dataset : datasets) { // Set parameters int folds = 10; Classifier baseClassifier = new LinearRegression(); // Set up the random number generator long seed = new Date().getTime(); Random random = new Random(seed); // Add IDs to the instances AddID.main(new String[] { "-i", MODELS_DIR + "/" + mode.toString().toLowerCase() + "/" + dataset.toString() + ".arff", "-o", MODELS_DIR + "/" + mode.toString().toLowerCase() + "/" + dataset.toString() + "-plusIDs.arff" }); Instances data = DataSource.read( MODELS_DIR + "/" + mode.toString().toLowerCase() + "/" + dataset.toString() + "-plusIDs.arff"); data.setClassIndex(data.numAttributes() - 1); // Instantiate the Remove filter Remove removeIDFilter = new Remove(); removeIDFilter.setAttributeIndices("first"); // Randomize the data data.randomize(random);/*from ww w . java2 s. c om*/ // Perform cross-validation Instances predictedData = null; Evaluation eval = new Evaluation(data); for (int n = 0; n < folds; n++) { Instances train = data.trainCV(folds, n, random); Instances test = data.testCV(folds, n); // Apply log filter Filter logFilter = new LogFilter(); logFilter.setInputFormat(train); train = Filter.useFilter(train, logFilter); logFilter.setInputFormat(test); test = Filter.useFilter(test, logFilter); // Copy the classifier Classifier classifier = AbstractClassifier.makeCopy(baseClassifier); // Instantiate the FilteredClassifier FilteredClassifier filteredClassifier = new FilteredClassifier(); filteredClassifier.setFilter(removeIDFilter); filteredClassifier.setClassifier(classifier); // Build the classifier filteredClassifier.buildClassifier(train); // Evaluate eval.evaluateModel(classifier, test); // Add predictions AddClassification filter = new AddClassification(); filter.setClassifier(classifier); filter.setOutputClassification(true); filter.setOutputDistribution(false); filter.setOutputErrorFlag(true); filter.setInputFormat(train); Filter.useFilter(train, filter); // trains the classifier Instances pred = Filter.useFilter(test, filter); // performs predictions on test set if (predictedData == null) { predictedData = new Instances(pred, 0); } for (int j = 0; j < pred.numInstances(); j++) { predictedData.add(pred.instance(j)); } } // Prepare output scores double[] scores = new double[predictedData.numInstances()]; for (Instance predInst : predictedData) { int id = new Double(predInst.value(predInst.attribute(0))).intValue() - 1; int valueIdx = predictedData.numAttributes() - 2; double value = predInst.value(predInst.attribute(valueIdx)); scores[id] = value; // Limit to interval [0;5] if (scores[id] > 5.0) { scores[id] = 5.0; } if (scores[id] < 0.0) { scores[id] = 0.0; } } // Output StringBuilder sb = new StringBuilder(); for (Double score : scores) { sb.append(score.toString() + LF); } FileUtils.writeStringToFile( new File(OUTPUT_DIR + "/" + mode.toString().toLowerCase() + "/" + dataset.toString() + ".csv"), sb.toString()); } }
From source file:dkpro.similarity.experiments.sts2013baseline.util.Evaluator.java
License:Open Source License
public static void runLinearRegressionCV(Mode mode, Dataset... datasets) throws Exception { for (Dataset dataset : datasets) { // Set parameters int folds = 10; Classifier baseClassifier = new LinearRegression(); // Set up the random number generator long seed = new Date().getTime(); Random random = new Random(seed); // Add IDs to the instances AddID.main(new String[] { "-i", MODELS_DIR + "/" + mode.toString().toLowerCase() + "/" + dataset.toString() + ".arff", "-o", MODELS_DIR + "/" + mode.toString().toLowerCase() + "/" + dataset.toString() + "-plusIDs.arff" }); String location = MODELS_DIR + "/" + mode.toString().toLowerCase() + "/" + dataset.toString() + "-plusIDs.arff"; Instances data = DataSource.read(location); if (data == null) { throw new IOException("Could not load data from: " + location); }/* w w w .j a va 2 s . co m*/ data.setClassIndex(data.numAttributes() - 1); // Instantiate the Remove filter Remove removeIDFilter = new Remove(); removeIDFilter.setAttributeIndices("first"); // Randomize the data data.randomize(random); // Perform cross-validation Instances predictedData = null; Evaluation eval = new Evaluation(data); for (int n = 0; n < folds; n++) { Instances train = data.trainCV(folds, n, random); Instances test = data.testCV(folds, n); // Apply log filter Filter logFilter = new LogFilter(); logFilter.setInputFormat(train); train = Filter.useFilter(train, logFilter); logFilter.setInputFormat(test); test = Filter.useFilter(test, logFilter); // Copy the classifier Classifier classifier = AbstractClassifier.makeCopy(baseClassifier); // Instantiate the FilteredClassifier FilteredClassifier filteredClassifier = new FilteredClassifier(); filteredClassifier.setFilter(removeIDFilter); filteredClassifier.setClassifier(classifier); // Build the classifier filteredClassifier.buildClassifier(train); // Evaluate eval.evaluateModel(classifier, test); // Add predictions AddClassification filter = new AddClassification(); filter.setClassifier(classifier); filter.setOutputClassification(true); filter.setOutputDistribution(false); filter.setOutputErrorFlag(true); filter.setInputFormat(train); Filter.useFilter(train, filter); // trains the classifier Instances pred = Filter.useFilter(test, filter); // performs predictions on test set if (predictedData == null) { predictedData = new Instances(pred, 0); } for (int j = 0; j < pred.numInstances(); j++) { predictedData.add(pred.instance(j)); } } // Prepare output scores double[] scores = new double[predictedData.numInstances()]; for (Instance predInst : predictedData) { int id = new Double(predInst.value(predInst.attribute(0))).intValue() - 1; int valueIdx = predictedData.numAttributes() - 2; double value = predInst.value(predInst.attribute(valueIdx)); scores[id] = value; // Limit to interval [0;5] if (scores[id] > 5.0) { scores[id] = 5.0; } if (scores[id] < 0.0) { scores[id] = 0.0; } } // Output StringBuilder sb = new StringBuilder(); for (Double score : scores) { sb.append(score.toString() + LF); } FileUtils.writeStringToFile( new File(OUTPUT_DIR + "/" + mode.toString().toLowerCase() + "/" + dataset.toString() + ".csv"), sb.toString()); } }
From source file:edu.cmu.cs.in.hoop.hoops.analyze.HoopWekaML.java
License:Open Source License
/** * *//*from ww w . j ava 2s. c o m*/ public HoopWekaML() { setClassName("HoopWekaML"); debug("HoopWekaML ()"); removeOutPort("KV"); setHoopDescription("Run Weka Machine Learning"); String[] options = new String[1]; options[0] = "-U"; // unpruned tree J48 tree = new J48(); // new instance of tree try { tree.setOptions(options); } catch (Exception e1) { // TODO Auto-generated catch block e1.printStackTrace(); } // Declare a nominal attribute along with its values FastVector fvNominalVal = new FastVector(3); fvNominalVal.addElement("blue"); fvNominalVal.addElement("gray"); fvNominalVal.addElement("black"); // Declare the class attribute along with its values FastVector fvClassVal = new FastVector(2); fvClassVal.addElement("positive"); fvClassVal.addElement("negative"); Attribute ClassAttribute = new Attribute("theClass", fvClassVal); // Declare two numeric attributes Attribute Attribute1 = new Attribute("firstNumeric"); Attribute Attribute2 = new Attribute("secondNumeric"); Attribute Attribute3 = new Attribute("aNominal", fvNominalVal); // Declare the feature vector FastVector fvWekaAttributes = new FastVector(4); fvWekaAttributes.addElement(Attribute1); fvWekaAttributes.addElement(Attribute2); fvWekaAttributes.addElement(Attribute3); fvWekaAttributes.addElement(ClassAttribute); // Create an empty training set Instances isTrainingSet = new Instances("Rel", fvWekaAttributes, 10); // Set class index isTrainingSet.setClassIndex(3); try { tree.buildClassifier(isTrainingSet); } catch (Exception e) { // TODO Auto-generated catch block e.printStackTrace(); } }
From source file:edu.cuny.qc.speech.AuToBI.util.ClassifierUtils.java
License:Open Source License
/** * Assigns a class attribute to a weka Instances object. * <p/>/* ww w . j av a 2 s . c om*/ * If no class attribute is given, or if the class attribute is not found in the list of attributes, the last * attribute is set to the class attribute. * * @param instances the instances object * @param class_attribute the desired class attribute. */ static void setWekaClassAttribute(Instances instances, String class_attribute) { if (class_attribute != null) { int i = 0; boolean set = false; while (i < instances.numAttributes() && !set) { Attribute attr = instances.attribute(i); if (class_attribute.equals(attr.name())) { instances.setClassIndex(i); set = true; } ++i; } if (!set) { instances.setClassIndex(instances.numAttributes() - 1); } } else { instances.setClassIndex(instances.numAttributes() - 1); } }
From source file:edu.oregonstate.eecs.mcplan.abstraction.EvaluateSimilarityFunction.java
License:Open Source License
public static Instances transformInstances(final Instances src, final CoordinateTransform transform) { final ArrayList<Attribute> out_attributes = new ArrayList<Attribute>(); for (int i = 0; i < transform.outDimension(); ++i) { out_attributes.add(new Attribute("x" + i)); }/* w w w .j av a 2 s .co m*/ out_attributes.add((Attribute) src.classAttribute().copy()); final Instances out = new Instances(src.relationName() + "_" + transform.name(), out_attributes, 0); for (int i = 0; i < src.size(); ++i) { final Instance inst = src.get(i); final RealVector flat = new ArrayRealVector(WekaUtil.unlabeledFeatures(inst)); final RealVector transformed_vector = transform.encode(flat).x; final double[] transformed = new double[transformed_vector.getDimension() + 1]; for (int j = 0; j < transformed_vector.getDimension(); ++j) { transformed[j] = transformed_vector.getEntry(j); } transformed[transformed.length - 1] = inst.classValue(); final Instance transformed_instance = new DenseInstance(inst.weight(), transformed); out.add(transformed_instance); transformed_instance.setDataset(out); } out.setClassIndex(out.numAttributes() - 1); return out; }
From source file:edu.oregonstate.eecs.mcplan.abstraction.Experiments.java
License:Open Source License
/** * Creates a labeled dataset of states pair with optimal actions. Action * labels are represented as indexes into an array list. Mappings in both * directions are also returned./*from ww w . j av a 2 s . c om*/ * @param config * @param attributes * @param data * @param labels * @param iter * @return */ private static <A extends VirtualConstructor<A>> SingleInstanceDataset<A> makeSingleInstanceDataset( final Configuration config, final ArrayList<Attribute> attributes, final ArrayList<double[]> data, final ArrayList<A> labels, final ArrayList<Pair<ArrayList<A>, TDoubleList>> qtable, final int iter) { // System.out.println( "data.size() = " + data.size() ); final int[] ii = Fn.range(0, data.size()); Fn.shuffle(config.rng, ii); final HashMap<A, Integer> action_to_int = new HashMap<A, Integer>(); final ArrayList<A> int_to_action = new ArrayList<A>(); final ArrayList<Pair<ArrayList<A>, TDoubleList>> abridged_qtable = (qtable != null ? new ArrayList<Pair<ArrayList<A>, TDoubleList>>() : null); final TIntArrayList counts = new TIntArrayList(); final int max_per_label = config.getInt("training.max_per_label"); final int max_instances = config.getInt("training.max_single"); final ArrayList<DenseInstance> instance_list = new ArrayList<DenseInstance>(); for (int i = 0; i < Math.min(data.size(), max_instances); ++i) { final int idx = ii[i]; final A a = labels.get(idx); final Integer idx_obj = action_to_int.get(a); final int label; if (idx_obj == null) { // System.out.println( "\tNew action: " + a ); label = int_to_action.size(); int_to_action.add(a); action_to_int.put(a, label); counts.add(0); } else { // System.out.println( "\tRepeat action: " + a ); label = idx_obj; } final int c = counts.get(label); if (max_per_label <= 0 || c < max_per_label) { // System.out.println( "Adding " + label ); final double[] phi = Fn.append(data.get(idx), label); final DenseInstance instance = new DenseInstance(1.0, phi); instance_list.add(instance); counts.set(label, c + 1); if (qtable != null) { abridged_qtable.add(qtable.get(idx)); } } } final int Nlabels = int_to_action.size(); final ArrayList<Attribute> labeled_attributes = addLabelToAttributes(attributes, Nlabels); final Instances instances = new Instances(deriveDatasetName(config.training_data_single, iter), labeled_attributes, counts.sum()); instances.setClassIndex(instances.numAttributes() - 1); for (final DenseInstance instance : instance_list) { instances.add(instance); instance.setDataset(instances); } return new SingleInstanceDataset<A>(instances, action_to_int, int_to_action, abridged_qtable); }
From source file:edu.oregonstate.eecs.mcplan.abstraction.PairDataset.java
License:Open Source License
public static <S, X extends FactoredRepresentation<S>, A extends VirtualConstructor<A>> Instances makePairDataset( final RandomGenerator rng, final int max_pairwise_instances, final Instances single, final InstanceCombiner combiner) { // final int max_pairwise = config.getInt( "training.max_pairwise" ); final ReservoirSampleAccumulator<Instance> negative = new ReservoirSampleAccumulator<Instance>(rng, max_pairwise_instances);// www .j a va2 s . c o m final ReservoirSampleAccumulator<Instance> positive = new ReservoirSampleAccumulator<Instance>(rng, max_pairwise_instances); for (int i = 0; i < single.size(); ++i) { // if( i % 100 == 0 ) { // System.out.println( "i = " + i ); // } for (int j = i + 1; j < single.size(); ++j) { final Instance ii = single.get(i); final Instance ij = single.get(j); final int label; if (ii.classValue() == ij.classValue()) { label = 1; if (positive.acceptNext()) { final Instance pair_instance = combiner.apply(ii, ij, label); positive.addPending(pair_instance); } } else { label = 0; if (negative.acceptNext()) { final Instance pair_instance = combiner.apply(ii, ij, label); negative.addPending(pair_instance); } } } } final int N = Math.min(negative.samples().size(), positive.samples().size()); final String dataset_name = "train_" + combiner.keyword() + "_" + max_pairwise_instances; final Instances x = new Instances(dataset_name, combiner.attributes(), 2 * N); x.setClassIndex(x.numAttributes() - 1); for (final Instance ineg : negative.samples()) { x.add(ineg); } for (final Instance ipos : positive.samples()) { x.add(ipos); } return x; // return new PairDataset( x, combiner ); }