List of usage examples for weka.core Instances setClassIndex
public void setClassIndex(int classIndex)
From source file:meka.core.PSUtils.java
License:Open Source License
/** * Transform instances into a multi-class representation. * @param D original dataset/*from ww w . j a v a 2 s . c om*/ * @param L number of labels in the original dataset * @param cname class name for the new dataset (may want to encode the list of indices here for RAkEL-like methods) * @param p pruning value * @param n restoration value * @return transformed dataset */ public static Instances PSTransformation(Instances D, int L, String cname, int p, int n) { D = new Instances(D); // Gather combinations HashMap<LabelSet, Integer> distinctCombinations = PSUtils.countCombinationsSparse(D, L); // Prune combinations if (p > 0) MLUtils.pruneCountHashMap(distinctCombinations, p); // Check there are > 2 if (distinctCombinations.size() <= 1 && p > 0) { // ... or try again if not ... System.err.println("[Warning] You did too much pruning, setting P = P-1"); return PSTransformation(D, L, cname, p - 1, n); } // Create class attribute ArrayList<String> ClassValues = new ArrayList<String>(); for (LabelSet y : distinctCombinations.keySet()) ClassValues.add(y.toString()); Attribute C = new Attribute(cname, ClassValues); // Insert new special attribute (which has all possible combinations of labels) D.insertAttributeAt(C, L); D.setClassIndex(L); //Add class values int N = D.numInstances(); for (int i = 0; i < N; i++) { Instance x = D.instance(i); LabelSet y = new LabelSet(MLUtils.toSparseIntArray(x, L)); String y_string = y.toString(); // add it if (ClassValues.contains(y_string)) //if its class value exists x.setClassValue(y_string); // decomp else if (n > 0) { //String d_subsets[] = getTopNSubsets(comb,distinctCombinations,n); LabelSet d_subsets[] = PSUtils.getTopNSubsets(y, distinctCombinations, n); //LabelSet d_subsets[] = PSUtils.cover(y,distinctCombinations); if (d_subsets.length > 0) { // fast x.setClassValue(d_subsets[0].toString()); // additional if (d_subsets.length > 1) { for (int s_i = 1; s_i < d_subsets.length; s_i++) { Instance x_ = (Instance) (x).copy(); x_.setClassValue(d_subsets[s_i].toString()); D.add(x_); } } } else { x.setClassMissing(); } } } // remove with missing class D.deleteWithMissingClass(); try { D = F.removeLabels(D, L); } catch (Exception e) { // should never happen } D.setClassIndex(0); return D; }
From source file:meka.core.PSUtils.java
License:Open Source License
/** * Transform instances into a multi-class representation. * @param D original dataset//from w w w . j ava2 s . c om * @param L number of labels in that dataset * @param cname class name for the new dataset (may want to encode the list of indices here for RAkEL-like methods) * @param p pruning value * @param n restoration value * @return transformed dataset */ public static Instances SLTransformation(Instances D, int L, String cname, int p, int n) { D = new Instances(D); // Gather combinations HashMap<LabelSet, Integer> distinctCombinations = PSUtils.countCombinationsSparse(D, L); // Prune combinations if (p > 0) MLUtils.pruneCountHashMap(distinctCombinations, p); // Check there are > 2 if (distinctCombinations.size() <= 1 && p > 0) { // ... or try again if not ... System.err.println("[Warning] You did too much pruning, setting P = P-1"); return PSTransformation(D, L, cname, p - 1, n); } // Create class attribute ArrayList<String> ClassValues = new ArrayList<String>(); for (LabelSet y : distinctCombinations.keySet()) ClassValues.add(y.toString()); Attribute C = new Attribute(cname, ClassValues); // Insert new special attribute (which has all possible combinations of labels) D.insertAttributeAt(C, L); D.setClassIndex(L); //Add class values int N = D.numInstances(); for (int i = 0; i < N; i++) { Instance x = D.instance(i); LabelSet y = new LabelSet(MLUtils.toSparseIntArray(x, L)); String y_string = y.toString(); // add it if (ClassValues.contains(y_string)) //if its class value exists x.setClassValue(y_string); // decomp else if (n > 0) { //String d_subsets[] = getTopNSubsets(comb,distinctCombinations,n); LabelSet d_subsets[] = PSUtils.getTopNSubsets(y, distinctCombinations, n); //LabelSet d_subsets[] = PSUtils.cover(y,distinctCombinations); if (d_subsets.length > 0) { // fast x.setClassValue(d_subsets[0].toString()); // additional if (d_subsets.length > 1) { for (int s_i = 1; s_i < d_subsets.length; s_i++) { Instance x_ = (Instance) (x).copy(); x_.setClassValue(d_subsets[s_i].toString()); D.add(x_); } } } else { x.setClassMissing(); } } } // remove with missing class D.deleteWithMissingClass(); try { D = F.removeLabels(D, L); } catch (Exception e) { // should never happen } D.setClassIndex(0); return D; }
From source file:meka.core.SuperLabelUtils.java
License:Open Source License
/** * Make Partition Dataset - out of dataset D, on indices part[]. * @param D regular multi-label dataset (of L = classIndex() labels) * @param part list of indices we want to make into a PS dataset. * @param P see {@link PSUtils}//ww w. java 2 s.c om * @param N see {@link PSUtils} * @return Dataset with 1 multi-valued class label, representing the combinations of part[]. */ public static Instances makePartitionDataset(Instances D, int part[], int P, int N) throws Exception { int L = D.classIndex(); Instances D_ = new Instances(D); // strip out irrelevant attributes D_.setClassIndex(-1); D_ = F.keepLabels(D, L, part); D_.setClassIndex(part.length); // make LC transformation D_ = PSUtils.PSTransformation(D_, P, N); return D_; }
From source file:meka.core.SuperLabelUtils.java
License:Open Source License
/** * Super Label Transformation - transform dataset D into a dataset with <code>k</code> multi-class target attributes. * Use the NSR/PS-style pruning and recomposition, according to partition 'indices', and pruning values 'p' and 'n'. * @see PSUtils.PSTransformation/* www .j a v a2 s. c o m*/ * @param indices m by k: m super variables, each relating to k original variables * @param D either multi-label or multi-target dataset * @param p pruning value * @param n subset relpacement value * @return a multi-target dataset */ public static Instances SLTransformation(Instances D, int indices[][], int p, int n) { int L = D.classIndex(); int K = indices.length; ArrayList<String> values[] = new ArrayList[K]; HashMap<String, Integer> counts[] = new HashMap[K]; // create D_ Instances D_ = new Instances(D); // clear D_ // F.removeLabels(D_,L); for (int j = 0; j < L; j++) { D_.deleteAttributeAt(0); } // create atts for (int j = 0; j < K; j++) { int att[] = indices[j]; //int values[] = new int[2]; //getValues(indices,D,p); counts[j] = getCounts(D, att, p); Set<String> vals = counts[j].keySet(); //getValues(D,att,p); values[j] = new ArrayList(vals); D_.insertAttributeAt(new Attribute(encodeClass(att), new ArrayList(vals)), j); } // copy over values ArrayList<Integer> deleteList = new ArrayList<Integer>(); for (int i = 0; i < D.numInstances(); i++) { Instance x = D.instance(i); for (int j = 0; j < K; j++) { String y = encodeValue(x, indices[j]); try { D_.instance(i).setValue(j, y); // y = } catch (Exception e) { // value not allowed deleteList.add(i); // mark it for deletion String y_close[] = getTopNSubsets(y, counts[j], n); // get N subsets for (int m = 0; m < y_close.length; m++) { //System.out.println("add "+y_close[m]+" "+counts[j]); Instance x_copy = (Instance) D_.instance(i).copy(); x_copy.setValue(j, y_close[m]); x_copy.setWeight(1.0 / y_close.length); D_.add(x_copy); } } } } // clean up Collections.sort(deleteList, Collections.reverseOrder()); //System.out.println("Deleting "+deleteList.size()+" defunct instances."); for (int i : deleteList) { D_.delete(i); } // set class D_.setClassIndex(K); // done! return D_; }
From source file:meka.filters.multilabel.SuperNodeFilter.java
License:Open Source License
/** * Merge Labels - Make a new 'D', with labels made into superlabels, according to partition 'indices', and pruning values 'p' and 'n'. * @param D assume attributes in D labeled by original index * @return Instances with attributes at j and k moved to position L as (j,k), with classIndex = L-1 *//*w ww . ja v a 2s.co m*/ public static Instances mergeLabels(Instances D, int indices[][], int p, int n) { int L = D.classIndex(); int K = indices.length; ArrayList<String> values[] = new ArrayList[K]; HashMap<String, Integer> counts[] = new HashMap[K]; // create D_ Instances D_ = new Instances(D); // clear D_ for (int j = 0; j < L; j++) { D_.deleteAttributeAt(0); } // create atts for (int j = 0; j < K; j++) { int att[] = indices[j]; //int values[] = new int[2]; //getValues(indices,D,p); counts[j] = getCounts(D, att, p); Set<String> vals = counts[j].keySet(); //getValues(D,att,p); values[j] = new ArrayList(vals); D_.insertAttributeAt(new Attribute(encodeClass(att), new ArrayList(vals)), j); } // copy over values ArrayList<Integer> deleteList = new ArrayList<Integer>(); for (int i = 0; i < D.numInstances(); i++) { Instance x = D.instance(i); for (int j = 0; j < K; j++) { String y = encodeValue(x, indices[j]); try { D_.instance(i).setValue(j, y); // y = } catch (Exception e) { // value not allowed deleteList.add(i); // mark it for deletion String y_close[] = NSR.getTopNSubsets(y, counts[j], n); // get N subsets for (int m = 0; m < y_close.length; m++) { //System.out.println("add "+y_close[m]+" "+counts[j]); Instance x_copy = (Instance) D_.instance(i).copy(); x_copy.setValue(j, y_close[m]); x_copy.setWeight(1.0 / y_close.length); D_.add(x_copy); } } } } // clean up Collections.sort(deleteList, Collections.reverseOrder()); //System.out.println("Deleting "+deleteList.size()+" defunct instances."); for (int i : deleteList) { D_.delete(i); } // set class D_.setClassIndex(K); // done! D = null; return D_; }
From source file:meka.filters.multilabel.SuperNodeFilter.java
License:Open Source License
/** * Merge Labels./* w w w .java 2 s . c om*/ * * @param j index 1 (assume that <code>j < k</code>) * @param k index 2 (assume that <code>j < k</code>) * @param D iInstances, with attributes in labeled by original index * @return Instaces with attributes at j and k moved to position L as (j,k), with classIndex = L-1 */ public static Instances mergeLabels(Instances D, int j, int k, int p) { int L = D.classIndex(); HashMap<String, Integer> count = new HashMap<String, Integer>(); Set<String> values = new HashSet<String>(); for (int i = 0; i < D.numInstances(); i++) { String v = encodeValue(D.instance(i).stringValue(j), D.instance(i).stringValue(k)); String w = "" + (int) D.instance(i).value(j) + (int) D.instance(i).value(k); //System.out.println("w = "+w); count.put(v, count.containsKey(v) ? count.get(v) + 1 : 1); values.add(encodeValue(D.instance(i).stringValue(j), D.instance(i).stringValue(k))); } //System.out.println("("+j+","+k+")"+values); System.out.print("pruned from " + count.size() + " to "); MLUtils.pruneCountHashMap(count, p); String y_max = (String) MLUtils.argmax(count); // @todo won't need this in the future System.out.println("" + count.size() + " with p = " + p); System.out.println("" + count); values = count.keySet(); // Create and insert the new attribute D.insertAttributeAt( new Attribute(encodeClass(D.attribute(j).name(), D.attribute(k).name()), new ArrayList(values)), L); // Set values for the new attribute for (int i = 0; i < D.numInstances(); i++) { Instance x = D.instance(i); String y_jk = encodeValue(x.stringValue(j), x.stringValue(k)); try { x.setValue(L, y_jk); // y_jk = } catch (Exception e) { //x.setMissing(L); //D.delete(i); //i--; String y_close[] = getNeighbours(y_jk, count, 1); // A+B+NEG, A+C+NEG //System.out.println("OK, that value ("+y_jk+") didn't exist ... set the closests ones ...: "+Arrays.toString(y_close)); int max_c = 0; for (String y_ : y_close) { int c = count.get(y_); if (c > max_c) { max_c = c; y_max = y_; } } //System.out.println("we actually found "+Arrays.toString(y_close)+" but will only set one for now (the one with the highest count) : "+y_max+" ..."); x.setValue(L, y_max); // ok, that value didn't exist, set the maximum one (@TODO: set the nearest one) } } // Delete separate attributes D.deleteAttributeAt(k > j ? k : j); D.deleteAttributeAt(k > j ? j : k); // Set class index D.setClassIndex(L - 1); return D; }
From source file:meka.filters.multilabel.SuperNodeFilter.java
License:Open Source License
public static void main(String[] argv) { try {/*from w w w.j a v a 2 s . co m*/ String fname = Utils.getOption('i', argv); Instances D = new Instances(new BufferedReader(new FileReader(fname))); SuperNodeFilter f = new SuperNodeFilter(); int c = Integer.parseInt(Utils.getOption('c', argv)); D.setClassIndex(c); System.out.println("" + f.process(D)); //runFilter(new SuperNodeFilter(), argv); } catch (Exception e) { System.err.println(""); e.printStackTrace(); //System.exit(1); } }
From source file:meka.filters.unsupervised.attribute.MekaClassAttributes.java
License:Open Source License
/** * Determines the output format based on the input format and returns * this. In case the output format cannot be returned immediately, i.e., * hasImmediateOutputFormat() returns false, then this method will called * from batchFinished() after the call of preprocess(Instances), in which, * e.g., statistics for the actual processing step can be gathered. * * @param inputFormat the input format to base the output format on * @return the output format * @throws Exception in case the determination goes wrong *///from ww w . j a v a 2 s. c o m protected Instances determineOutputFormat(Instances inputFormat) throws Exception { int i; int[] indices; StringBuilder order; Instances output; m_AttributeIndices.setUpper(inputFormat.numAttributes() - 1); order = new StringBuilder(); indices = m_AttributeIndices.getSelection(); if (indices.length == 0) throw new WekaException("No attributes defined as class attributes!"); for (i = 0; i < indices.length; i++) { if (i > 0) order.append(","); order.append("" + (indices[i] + 1)); } for (i = 0; i < inputFormat.numAttributes(); i++) { if (m_AttributeIndices.isInRange(i)) continue; order.append(","); order.append("" + (i + 1)); } m_Reorder.setAttributeIndices(order.toString()); m_Reorder.setInputFormat(inputFormat); output = m_Reorder.getOutputFormat(); output.setClassIndex(indices.length); output.setRelationName("-C " + indices.length); return output; }
From source file:meka.gui.dataviewer.DataViewerMainPanel.java
License:Open Source License
/** * displays some properties of the instances *//* w ww . ja v a2 s . co m*/ public void showProperties() { DataPanel panel; ListSelectorDialog dialog; Vector<String> props; Instances inst; panel = getCurrentPanel(); if (panel == null) { return; } inst = panel.getInstances(); if (inst == null) { return; } if (inst.classIndex() < 0) { inst.setClassIndex(inst.numAttributes() - 1); } // get some data props = new Vector<String>(); props.add("Filename: " + panel.getFilename()); props.add("Relation name: " + inst.relationName()); props.add("# of instances: " + inst.numInstances()); props.add("# of attributes: " + inst.numAttributes()); props.add("Class attribute: " + inst.classAttribute().name()); props.add("# of class labels: " + inst.numClasses()); dialog = new ListSelectorDialog(getParentFrame(), new JList(props)); dialog.showDialog(); }
From source file:meka.gui.explorer.classify.EditTestData.java
License:Open Source License
/** * Returns the action lister to use in the menu. * * @return the listener//w w w . j a v a2 s.com */ public ActionListener getActionListener(final ClassifyTab owner) { return new ActionListener() { @Override public void actionPerformed(ActionEvent e) { ViewerDialog dialog; int result; Instances copy; Instances newInstances; copy = new Instances(owner.getTestData()); dialog = new ViewerDialog(null); dialog.setSize(800, 600); dialog.setLocationRelativeTo(owner); result = dialog.showDialog(copy); if (result == ViewerDialog.APPROVE_OPTION) { // if class was not set before, reset it again after use of filter newInstances = dialog.getInstances(); if (owner.getTestData().classIndex() < 0) newInstances.setClassIndex(-1); owner.setTestData(newInstances); } } }; }