List of usage examples for weka.core Instance weight
public double weight();
From source file:edu.oregonstate.eecs.mcplan.abstraction.WekaUtil.java
License:Open Source License
public static Instances powerSet(final Instances D, final int n) { final Attribute class_attr = D.classAttribute(); final ImmutableSet.Builder<Integer> b = new ImmutableSet.Builder<Integer>(); final int Nattr = class_attr != null ? D.numAttributes() - 1 : D.numAttributes(); for (final int i : Fn.range(1, Nattr)) { b.add(i);// ww w . j a v a2 s. c o m } final Set<Set<Integer>> index = Sets.powerSet(b.build()); final ArrayList<Attribute> attributes = new ArrayList<Attribute>(); for (final Set<Integer> subset : index) { if (subset.isEmpty() || subset.size() > n) { continue; } final StringBuilder attr_name = new StringBuilder(); int count = 0; for (final Integer i : subset) { if (count++ > 0) { attr_name.append("_x_"); } attr_name.append(D.attribute(i).name()); } attributes.add(new Attribute(attr_name.toString())); } if (class_attr != null) { assert (class_attr.isNominal()); attributes.add(WekaUtil.createNominalAttribute(class_attr.name(), class_attr.numValues())); } final String Pname = "P" + n + "_" + D.relationName(); final Instances P = new Instances(Pname, attributes, 0); if (class_attr != null) { P.setClassIndex(attributes.size() - 1); } for (final Instance inst : D) { final double[] xp = new double[attributes.size()]; int idx = 0; for (final Set<Integer> subset : index) { if (subset.isEmpty() || subset.size() > n) { continue; } double p = 1.0; for (final Integer i : subset) { p *= inst.value(i); } xp[idx++] = p; } if (class_attr != null) { xp[idx++] = inst.classValue(); } WekaUtil.addInstance(P, new DenseInstance(inst.weight(), xp)); } return P; }
From source file:edu.oregonstate.eecs.mcplan.abstraction.WekaUtil.java
License:Open Source License
public static Instances allPairwiseProducts(final Instances single, final boolean reflexive, final boolean symmetric) { final int c = single.classIndex(); System.out.println("Class attribute = " + c); final ArrayList<Attribute> pair_attributes = new ArrayList<Attribute>(); for (int i = 0; i < single.numAttributes(); ++i) { if (i == c) { continue; }//from w w w .j a v a2s .com final Attribute ai = single.attribute(i); final int j0 = (symmetric ? 0 : i); for (int j = j0; j < single.numAttributes(); ++j) { if (j == c) { continue; } if (!reflexive && i == j) { continue; } final Attribute aj = single.attribute(j); final String name = ai.name() + "_x_" + aj.name(); pair_attributes.add(new Attribute(name)); } } String pair_name = single.relationName(); pair_name += "_x"; if (reflexive) { pair_name += "r"; } if (symmetric) { pair_name += "s"; } pair_name += "_"; pair_name += single.relationName(); final Instances result = new Instances(pair_name, pair_attributes, 0); for (final Instance inst : single) { final double[] xp = new double[pair_attributes.size()]; int idx = 0; for (int i = 0; i < single.numAttributes(); ++i) { if (i == c) { continue; } final double xi = inst.value(i); final int j0 = (symmetric ? 0 : i); for (int j = j0; j < single.numAttributes(); ++j) { if (j == c) { continue; } if (!reflexive && i == j) { continue; } final double xj = inst.value(j); xp[idx++] = xi * xj; } } WekaUtil.addInstance(result, new DenseInstance(inst.weight(), xp)); } return result; }
From source file:edu.oregonstate.eecs.mcplan.domains.frogger.FroggerRepresentationConverter.java
License:Open Source License
public static Instances absoluteToRelative(final FroggerParameters params, final Instances src, final int vision) { final ArrayList<Attribute> attributes = new ArrayList<Attribute>(); attributes.add(new Attribute("x")); attributes.add(new Attribute("y")); for (int i = vision; i >= -vision; --i) { for (int j = -vision; j <= vision; ++j) { if (i == 0 && j == 0) { continue; }//from www . jav a 2s . co m final String name = "car_x" + (j >= 0 ? "+" : "") + j + "_y" + (i >= 0 ? "+" : "") + i; attributes.add(new Attribute(name)); } } attributes.add(src.classAttribute()); final Instances dest = new Instances(src.relationName() + "_relative", attributes, src.size()); for (final Instance inst : src) { final double[] phi = new double[attributes.size()]; int idx = 0; final int x = (int) inst.value(0); final int y = (int) inst.value(1); phi[idx++] = x; phi[idx++] = y; for (int i = vision; i >= -vision; --i) { for (int j = -vision; j <= vision; ++j) { if (i == 0 && j == 0) { continue; } final int xoff = x + j; final int yoff = y + i; if (xoff >= 0 && xoff < params.road_length && yoff >= 1 && yoff <= params.lanes) { final int car = (int) inst.value(2 + (yoff - 1) * params.road_length + xoff); phi[idx] = car; // s.grid[dy][dx] == Tile.Car ? 1.0 : 0.0; // fv[2 + (dy-1)*road_length + dx] } idx += 1; } } phi[idx++] = inst.classValue(); assert (idx == phi.length); WekaUtil.addInstance(dest, new DenseInstance(inst.weight(), phi)); } return dest; }
From source file:edu.oregonstate.eecs.mcplan.ml.GameTreeStateSimilarityDataset.java
License:Open Source License
@Override public void run() { System.out.println("*** Extracting state nodes"); final Visitor visitor = new Visitor(); tree_.root().accept(visitor);/*from w ww . j a v a2s . c om*/ // This extracts only the level-1 nodes. // TODO: Do this somewhere better. final HashMap<List<ActionNode<S, A>>, List<StateNode<S, A>>> tx = new HashMap<List<ActionNode<S, A>>, List<StateNode<S, A>>>(); final ArrayList<StateNode<S, A>> depth_1 = new ArrayList<StateNode<S, A>>(); for (final Map.Entry<List<ActionNode<S, A>>, List<StateNode<S, A>>> e : visitor.xs.entrySet()) { if (e.getKey() == null || e.getKey().size() != 1) { continue; } else { depth_1.addAll(e.getValue()); } } tx.put(null, depth_1); final Comparator<Instance> weight_comp = new Comparator<Instance>() { @Override public int compare(final Instance a, final Instance b) { return (int) Math.signum(a.weight() - b.weight()); } }; final int max_cap = max_instances_ + 1; final PriorityQueue<Instance> positive = new PriorityQueue<Instance>(max_cap, weight_comp); final PriorityQueue<Instance> negative = new PriorityQueue<Instance>(max_cap, weight_comp); System.out.println("*** Building Instances"); for (final Map.Entry<List<ActionNode<S, A>>, List<StateNode<S, A>>> e : tx.entrySet()) { System.out.println("***** key = " + e.getKey() + ", value.size() = " + e.getValue().size()); final String name = (e.getKey() != null ? e.getKey().toString() : "null"); final List<StateNode<S, A>> values = e.getValue(); final int[] num_instances = { 0, 0 }; int count = 0; for (int i = 0; i < values.size(); ++i) { for (int j = i + 1; j < values.size(); ++j) { if (count++ % 100 == 0) { System.out.println("***** instance " + (count - 1)); } final StateNode<S, A> s_i = values.get(i); final StateNode<S, A> s_j = values.get(j); if (s_i.n() < min_samples_ || s_j.n() < min_samples_) { System.out.println("! skipping under-sampled state pair"); continue; } final double[] phi_i = s_i.token.phi(); final double[] phi_j = s_j.token.phi(); assert (phi_i.length == phi_j.length); if (phi_i.length != attributes_.size() - 1) { System.out.println("! phi_i.length = " + phi_i.length); System.out.println("! attributes_.size() = " + attributes_.size()); } assert (phi_i.length == attributes_.size() - 1); // Feature vector is absolute difference of the two state // feature vectors. final double[] phi_labeled = new double[phi_i.length + 1]; for (int k = 0; k < phi_i.length; ++k) { phi_labeled[k] = Math.abs(phi_i[k] - phi_j[k]); } final Tuple2<Integer, Double> labeled = label(e.getKey(), player_, s_i, s_j); final int label = labeled._1; final double weight = labeled._2; final String label_string = Integer.toString(label); phi_labeled[label_index] = label; //attributes.get( label_index ).indexOfValue( label_string ); num_instances[label] += 1; final Instance instance = new DenseInstance(weight, phi_labeled); if (label == 0) { negative.add(instance); if (negative.size() >= max_cap) { negative.poll(); } } else { positive.add(instance); if (positive.size() >= max_cap) { positive.poll(); } } } // for j } // for i System.out.println("num_instances = " + Arrays.toString(num_instances)); final Instances x = new Instances(name, attributes_, negative.size() + positive.size()); x.setClassIndex(label_index); x.addAll(negative); x.addAll(positive); xs_.put(e.getKey(), x); } }
From source file:en_deep.mlprocess.manipulation.featmodif.FeatureModifierFilter.java
License:Open Source License
/** * Convert a single instance over if the class is nominal. The converted * instance is added to the end of the output queue. * * @param instance the instance to convert *//* www . j a va 2 s . co m*/ private void convertInstance(Instance instance) { double[] vals = new double[outputFormatPeek().numAttributes()]; String[] stringVals = new String[vals.length]; int attSoFar = 0; for (int j = 0; j < getInputFormat().numAttributes(); j++) { Attribute att = instance.attribute(j); if (!m_Columns.isInRange(j)) { vals[attSoFar] = instance.value(j); attSoFar++; } else { // store new string values, make double values "missing" for now (if some string // values are missing, the double values will remain missing) if (instance.value(0) == 12 && instance.value(1) == 9 && att.name().equals("sempos")) { attSoFar = attSoFar; } attSoFar += getAttributeOutputValue(att, instance.value(j), vals, stringVals, attSoFar); } } Instance inst = null; if (instance instanceof SparseInstance) { inst = new SparseInstance(instance.weight(), vals); } else { inst = new DenseInstance(instance.weight(), vals); } inst.setDataset(getOutputFormat()); copyValues(inst, false, instance.dataset(), getOutputFormat()); // add new string values to the output data set and to the instance for (int i = 0; i < stringVals.length; ++i) { if (stringVals[i] != null) { vals[i] = inst.dataset().attribute(i).addStringValue(stringVals[i]); } } inst.replaceMissingValues(vals); inst.setDataset(getOutputFormat()); push(inst); }
From source file:en_deep.mlprocess.manipulation.featmodif.ReplaceMissing.java
License:Open Source License
/** * Convert a single instance over if the class is nominal. The converted * instance is added to the end of the output queue. * * @param instance the instance to convert *///w w w . jav a2 s .co m private void convertInstance(Instance instance) { // create a copy of the input instance Instance inst = null; if (instance instanceof SparseInstance) { inst = new SparseInstance(instance.weight(), instance.toDoubleArray()); } else { inst = new DenseInstance(instance.weight(), instance.toDoubleArray()); } // copy the string values from this instance as well (only the existing ones) inst.setDataset(getOutputFormat()); copyValues(inst, false, instance.dataset(), getOutputFormat()); // beware of weird behavior of this function (see source)!! inst.setDataset(getOutputFormat()); // find the missing values to be filled + the double values for the new "missing" label and store it double[] vals = instance.toDoubleArray(); for (int j = 0; j < getInputFormat().numAttributes(); j++) { Attribute att = instance.attribute(j); if (m_Columns.isInRange(j) && instance.isMissing(j)) { // find the "missing" value in the output nominal attribute if (att.isNominal()) { vals[j] = inst.dataset().attribute(j).indexOfValue(m_ReplVal); } // add a string value for the new "missing" label else if (att.isString()) { vals[j] = inst.dataset().attribute(j).addStringValue(m_ReplVal); } } } // fill in the missing values found inst.replaceMissingValues(vals); push(inst); }
From source file:en_deep.mlprocess.manipulation.SetAwareNominalToBinary.java
License:Open Source License
/** * Convert a single instance over if the class is nominal. The converted * instance is added to the end of the output queue. * * @param instance the instance to convert *//*from w w w . j a v a2s . c o m*/ private void convertInstance(Instance instance) { double[] vals = new double[outputFormatPeek().numAttributes()]; int attSoFar = 0; for (int j = 0; j < getInputFormat().numAttributes(); j++) { Attribute att = getInputFormat().attribute(j); if (!att.isNominal() || (j == getInputFormat().classIndex()) || !m_Columns.isInRange(j)) { vals[attSoFar] = instance.value(j); attSoFar++; } else { if ((att.numValues() <= 2) && (!m_TransformAll)) { vals[attSoFar] = instance.value(j); attSoFar++; } else { attSoFar += setConvertedAttribute(att, instance.value(j), vals, attSoFar); } } } Instance inst = null; if (instance instanceof SparseInstance) { inst = new SparseInstance(instance.weight(), vals); } else { inst = new DenseInstance(instance.weight(), vals); } inst.setDataset(getOutputFormat()); copyValues(inst, false, instance.dataset(), getOutputFormat()); inst.setDataset(getOutputFormat()); push(inst); }
From source file:etc.aloe.filters.AbstractRegexFilter.java
License:Open Source License
@Override protected Instance process(Instance instance) throws Exception { if (stringAttributeIndex < 0) { throw new IllegalStateException("String attribute not set"); }/*from w ww . j av a 2 s . c o m*/ String stringValue = instance.stringValue(stringAttributeIndex); NamedRegex[] regexFeatures = getRegexFeatures(); int numOldValues = instance.numAttributes(); int numNewFeatures = regexFeatures.length; if (countRegexLengths) { numNewFeatures = regexFeatures.length * 2; } double[] newValues = new double[numOldValues + numNewFeatures]; // Copy all attributes from input to output for (int i = 0; i < getInputFormat().numAttributes(); i++) { if (getInputFormat().attribute(i).type() != Attribute.STRING) { // Add simple nominal and numeric attributes directly if (instance.value(i) != 0.0) { newValues[i] = instance.value(i); } } else { if (instance.isMissing(i)) { newValues[i] = Utils.missingValue(); } else { // If this is a string attribute, we have to first add // this value to the range of possible values, then add // its new internal index. if (outputFormatPeek().attribute(i).numValues() == 0) { // Note that the first string value in a // SparseInstance doesn't get printed. outputFormatPeek().attribute(i).addStringValue("Hack to defeat SparseInstance bug"); } int newIndex = outputFormatPeek().attribute(i).addStringValue(instance.stringValue(i)); newValues[i] = newIndex; } } } for (int i = 0; i < regexFeatures.length; i++) { Pattern pattern = regexFeatures[i].getPattern(); Matcher matches = pattern.matcher(stringValue); int count = 0; int maxLength = 0; while (matches.find()) { count++; int len = matches.group().length(); if (len > maxLength) { maxLength = len; } } int index = numOldValues + i; if (countRegexLengths) { index = numOldValues + 2 * i; } newValues[index] = count; if (countRegexLengths) { newValues[numOldValues + 2 * i + 1] = maxLength; } } Instance result = new SparseInstance(instance.weight(), newValues); return result; }
From source file:etc.aloe.filters.StringToDictionaryVector.java
License:Open Source License
/** * Converts the instance w/o normalization. * * @param instance the instance to convert * * @param ArrayList<Instance> the list of instances * @return the document length/*w ww . j a v a2s . c o m*/ */ private double convertInstancewoDocNorm(Instance instance, ArrayList<Instance> converted) { if (stringAttributeIndex < 0) { throw new IllegalStateException("String attribute index not valid"); } int numOldValues = instance.numAttributes(); double[] newValues = new double[numOldValues + m_selectedTerms.size()]; // Copy all attributes from input to output for (int i = 0; i < getInputFormat().numAttributes(); i++) { if (getInputFormat().attribute(i).type() != Attribute.STRING) { // Add simple nominal and numeric attributes directly if (instance.value(i) != 0.0) { newValues[i] = instance.value(i); } } else { if (instance.isMissing(i)) { newValues[i] = Utils.missingValue(); } else { // If this is a string attribute, we have to first add // this value to the range of possible values, then add // its new internal index. if (outputFormatPeek().attribute(i).numValues() == 0) { // Note that the first string value in a // SparseInstance doesn't get printed. outputFormatPeek().attribute(i).addStringValue("Hack to defeat SparseInstance bug"); } int newIndex = outputFormatPeek().attribute(i).addStringValue(instance.stringValue(i)); newValues[i] = newIndex; } } } String stringValue = instance.stringValue(stringAttributeIndex); double docLength = 0; HashMap<String, Integer> termMatches = m_selectedTermsTrie.countNonoverlappingMatches(stringValue); for (Map.Entry<String, Integer> entry : termMatches.entrySet()) { String term = entry.getKey(); int termIdx = m_selectedTermIndices.get(term); double matches = entry.getValue(); if (!m_OutputCounts && matches > 0) { matches = 1; } if (matches > 0) { if (m_TFTransform == true) { matches = Math.log(matches + 1); } if (m_IDFTransform == true) { matches = matches * Math.log(m_NumInstances / (double) m_DocsCounts[termIdx]); } newValues[numOldValues + termIdx] = matches; docLength += matches * matches; } } Instance result = new SparseInstance(instance.weight(), newValues); converted.add(result); return Math.sqrt(docLength); }
From source file:etc.aloe.filters.WordFeaturesExtractor.java
License:Open Source License
@Override protected Instance process(Instance instance) throws Exception { if (selectedAttributeIndex < 0) { throw new IllegalStateException("String attribute not set"); }/*from www . ja v a 2 s . co m*/ int numOldValues = instance.numAttributes(); int numNewFeatures = unigrams.size() + bigrams.size(); double[] newValues = new double[numOldValues + numNewFeatures]; // Copy all attributes from input to output for (int i = 0; i < getInputFormat().numAttributes(); i++) { if (getInputFormat().attribute(i).type() != Attribute.STRING) { // Add simple nominal and numeric attributes directly if (instance.value(i) != 0.0) { newValues[i] = instance.value(i); } } else { if (instance.isMissing(i)) { newValues[i] = Utils.missingValue(); } else { // If this is a string attribute, we have to first add // this value to the range of possible values, then add // its new internal index. if (outputFormatPeek().attribute(i).numValues() == 0) { // Note that the first string value in a // SparseInstance doesn't get printed. outputFormatPeek().attribute(i).addStringValue("Hack to defeat SparseInstance bug"); } int newIndex = outputFormatPeek().attribute(i).addStringValue(instance.stringValue(i)); newValues[i] = newIndex; } } } String stringValue = instance.stringValue(selectedAttributeIndex); if (instance.isMissing(selectedAttributeIndex) == false) { List<String> words = tokenizeDocument(instance); Set<String> wordSet = new HashSet<String>(words); for (int i = 0; i < unigrams.size(); i++) { String unigram = unigrams.get(i); int count = 0; if (wordSet.contains(unigram)) { //Count the times the word is in the document for (int w = 0; w < words.size(); w++) { if (words.get(w).equals(unigram)) { count += 1; } } } int featureIndex = numOldValues + i; newValues[featureIndex] = count; } for (int i = 0; i < bigrams.size(); i++) { Bigram bigram = bigrams.get(i); int count = bigram.getTimesInDocument(words); int featureIndex = numOldValues + unigrams.size() + i; newValues[featureIndex] = count; } } Instance result = new SparseInstance(instance.weight(), newValues); return result; }