List of usage examples for weka.core Instances insertAttributeAt
public void insertAttributeAt(Attribute att, int position)
From source file:meka.classifiers.multitarget.NSR.java
License:Open Source License
public Instances convertInstances(Instances D, int L) throws Exception { //Gather combinations HashMap<String, Integer> distinctCombinations = MLUtils.classCombinationCounts(D); if (getDebug()) System.out.println("Found " + distinctCombinations.size() + " unique combinations"); //Prune combinations MLUtils.pruneCountHashMap(distinctCombinations, m_P); if (getDebug()) System.out.println("Pruned to " + distinctCombinations.size() + " with P=" + m_P); // Remove all class attributes Instances D_ = MLUtils.deleteAttributesAt(new Instances(D), MLUtils.gen_indices(L)); // Add a new class attribute D_.insertAttributeAt(new Attribute("CLASS", new ArrayList(distinctCombinations.keySet())), 0); // create the class attribute D_.setClassIndex(0);/*w ww. ja v a2 s . c om*/ //Add class values for (int i = 0; i < D.numInstances(); i++) { String y = MLUtils.encodeValue(MLUtils.toIntArray(D.instance(i), L)); // add it if (distinctCombinations.containsKey(y)) //if its class value exists D_.instance(i).setClassValue(y); // decomp else if (m_N > 0) { String d_subsets[] = SuperLabelUtils.getTopNSubsets(y, distinctCombinations, m_N); for (String s : d_subsets) { int w = distinctCombinations.get(s); Instance copy = (Instance) (D_.instance(i)).copy(); copy.setClassValue(s); copy.setWeight(1.0 / d_subsets.length); D_.add(copy); } } } // remove with missing class D_.deleteWithMissingClass(); // keep the header of new dataset for classification m_InstancesTemplate = new Instances(D_, 0); if (getDebug()) System.out.println("" + D_); return D_; }
From source file:meka.core.MLUtils.java
License:Open Source License
/** * InsertZintoD - Insert data Z[][] to Instances D (e.g., as labels). * NOTE: Assumes binary labels!/* ww w . j a v a2s. c om*/ * @see #addZtoD(Instances, double[][], int) */ private static Instances insertZintoD(Instances D, double Z[][]) { int L = Z[0].length; // add attributes for (int j = 0; j < L; j++) { D.insertAttributeAt(new Attribute("c" + j, Arrays.asList(new String[] { "0", "1" })), j); } // add values Z[0]...Z[N] to D // (note that if D.numInstances() < Z.length, only some are added) for (int j = 0; j < L; j++) { for (int i = 0; i < D.numInstances(); i++) { D.instance(i).setValue(j, Z[i][j] > 0.5 ? 1.0 : 0.0); } } D.setClassIndex(L); return D; }
From source file:meka.core.MLUtils.java
License:Open Source License
/** * AddZtoD - Add attribute space Z[N][H] (N rows of H columns) to Instances D, which should have N rows also. * @param D dataset (of N instances) * @param Z attribute space (of N rows, H columns) * @param L column to add Z from in D *//*from w w w . j av a 2 s .c om*/ private static Instances addZtoD(Instances D, double Z[][], int L) { int H = Z[0].length; int N = D.numInstances(); // add attributes for (int a = 0; a < H; a++) { D.insertAttributeAt(new Attribute("A" + a), L + a); } // add values Z[0]...Z[N] to D for (int a = 0; a < H; a++) { for (int i = 0; i < N; i++) { D.instance(i).setValue(L + a, Z[i][a]); } } D.setClassIndex(L); return D; }
From source file:meka.core.PSUtils.java
License:Open Source License
/** * Transform instances into a multi-class representation. * @param D original dataset// w ww . j ava 2 s. co m * @param L number of labels in the original dataset * @param cname class name for the new dataset (may want to encode the list of indices here for RAkEL-like methods) * @param p pruning value * @param n restoration value * @return transformed dataset */ public static Instances PSTransformation(Instances D, int L, String cname, int p, int n) { D = new Instances(D); // Gather combinations HashMap<LabelSet, Integer> distinctCombinations = PSUtils.countCombinationsSparse(D, L); // Prune combinations if (p > 0) MLUtils.pruneCountHashMap(distinctCombinations, p); // Check there are > 2 if (distinctCombinations.size() <= 1 && p > 0) { // ... or try again if not ... System.err.println("[Warning] You did too much pruning, setting P = P-1"); return PSTransformation(D, L, cname, p - 1, n); } // Create class attribute ArrayList<String> ClassValues = new ArrayList<String>(); for (LabelSet y : distinctCombinations.keySet()) ClassValues.add(y.toString()); Attribute C = new Attribute(cname, ClassValues); // Insert new special attribute (which has all possible combinations of labels) D.insertAttributeAt(C, L); D.setClassIndex(L); //Add class values int N = D.numInstances(); for (int i = 0; i < N; i++) { Instance x = D.instance(i); LabelSet y = new LabelSet(MLUtils.toSparseIntArray(x, L)); String y_string = y.toString(); // add it if (ClassValues.contains(y_string)) //if its class value exists x.setClassValue(y_string); // decomp else if (n > 0) { //String d_subsets[] = getTopNSubsets(comb,distinctCombinations,n); LabelSet d_subsets[] = PSUtils.getTopNSubsets(y, distinctCombinations, n); //LabelSet d_subsets[] = PSUtils.cover(y,distinctCombinations); if (d_subsets.length > 0) { // fast x.setClassValue(d_subsets[0].toString()); // additional if (d_subsets.length > 1) { for (int s_i = 1; s_i < d_subsets.length; s_i++) { Instance x_ = (Instance) (x).copy(); x_.setClassValue(d_subsets[s_i].toString()); D.add(x_); } } } else { x.setClassMissing(); } } } // remove with missing class D.deleteWithMissingClass(); try { D = F.removeLabels(D, L); } catch (Exception e) { // should never happen } D.setClassIndex(0); return D; }
From source file:meka.core.PSUtils.java
License:Open Source License
/** * Transform instances into a multi-class representation. * @param D original dataset//from w w w . ja v a 2 s. c om * @param L number of labels in that dataset * @param cname class name for the new dataset (may want to encode the list of indices here for RAkEL-like methods) * @param p pruning value * @param n restoration value * @return transformed dataset */ public static Instances SLTransformation(Instances D, int L, String cname, int p, int n) { D = new Instances(D); // Gather combinations HashMap<LabelSet, Integer> distinctCombinations = PSUtils.countCombinationsSparse(D, L); // Prune combinations if (p > 0) MLUtils.pruneCountHashMap(distinctCombinations, p); // Check there are > 2 if (distinctCombinations.size() <= 1 && p > 0) { // ... or try again if not ... System.err.println("[Warning] You did too much pruning, setting P = P-1"); return PSTransformation(D, L, cname, p - 1, n); } // Create class attribute ArrayList<String> ClassValues = new ArrayList<String>(); for (LabelSet y : distinctCombinations.keySet()) ClassValues.add(y.toString()); Attribute C = new Attribute(cname, ClassValues); // Insert new special attribute (which has all possible combinations of labels) D.insertAttributeAt(C, L); D.setClassIndex(L); //Add class values int N = D.numInstances(); for (int i = 0; i < N; i++) { Instance x = D.instance(i); LabelSet y = new LabelSet(MLUtils.toSparseIntArray(x, L)); String y_string = y.toString(); // add it if (ClassValues.contains(y_string)) //if its class value exists x.setClassValue(y_string); // decomp else if (n > 0) { //String d_subsets[] = getTopNSubsets(comb,distinctCombinations,n); LabelSet d_subsets[] = PSUtils.getTopNSubsets(y, distinctCombinations, n); //LabelSet d_subsets[] = PSUtils.cover(y,distinctCombinations); if (d_subsets.length > 0) { // fast x.setClassValue(d_subsets[0].toString()); // additional if (d_subsets.length > 1) { for (int s_i = 1; s_i < d_subsets.length; s_i++) { Instance x_ = (Instance) (x).copy(); x_.setClassValue(d_subsets[s_i].toString()); D.add(x_); } } } else { x.setClassMissing(); } } } // remove with missing class D.deleteWithMissingClass(); try { D = F.removeLabels(D, L); } catch (Exception e) { // should never happen } D.setClassIndex(0); return D; }
From source file:meka.core.SuperLabelUtils.java
License:Open Source License
/** * Super Label Transformation - transform dataset D into a dataset with <code>k</code> multi-class target attributes. * Use the NSR/PS-style pruning and recomposition, according to partition 'indices', and pruning values 'p' and 'n'. * @see PSUtils.PSTransformation// w w w . j a v a 2s. com * @param indices m by k: m super variables, each relating to k original variables * @param D either multi-label or multi-target dataset * @param p pruning value * @param n subset relpacement value * @return a multi-target dataset */ public static Instances SLTransformation(Instances D, int indices[][], int p, int n) { int L = D.classIndex(); int K = indices.length; ArrayList<String> values[] = new ArrayList[K]; HashMap<String, Integer> counts[] = new HashMap[K]; // create D_ Instances D_ = new Instances(D); // clear D_ // F.removeLabels(D_,L); for (int j = 0; j < L; j++) { D_.deleteAttributeAt(0); } // create atts for (int j = 0; j < K; j++) { int att[] = indices[j]; //int values[] = new int[2]; //getValues(indices,D,p); counts[j] = getCounts(D, att, p); Set<String> vals = counts[j].keySet(); //getValues(D,att,p); values[j] = new ArrayList(vals); D_.insertAttributeAt(new Attribute(encodeClass(att), new ArrayList(vals)), j); } // copy over values ArrayList<Integer> deleteList = new ArrayList<Integer>(); for (int i = 0; i < D.numInstances(); i++) { Instance x = D.instance(i); for (int j = 0; j < K; j++) { String y = encodeValue(x, indices[j]); try { D_.instance(i).setValue(j, y); // y = } catch (Exception e) { // value not allowed deleteList.add(i); // mark it for deletion String y_close[] = getTopNSubsets(y, counts[j], n); // get N subsets for (int m = 0; m < y_close.length; m++) { //System.out.println("add "+y_close[m]+" "+counts[j]); Instance x_copy = (Instance) D_.instance(i).copy(); x_copy.setValue(j, y_close[m]); x_copy.setWeight(1.0 / y_close.length); D_.add(x_copy); } } } } // clean up Collections.sort(deleteList, Collections.reverseOrder()); //System.out.println("Deleting "+deleteList.size()+" defunct instances."); for (int i : deleteList) { D_.delete(i); } // set class D_.setClassIndex(K); // done! return D_; }
From source file:meka.filters.multilabel.SuperNodeFilter.java
License:Open Source License
/** * Merge Labels - Make a new 'D', with labels made into superlabels, according to partition 'indices', and pruning values 'p' and 'n'. * @param D assume attributes in D labeled by original index * @return Instances with attributes at j and k moved to position L as (j,k), with classIndex = L-1 *///from www. j a v a2 s . c om public static Instances mergeLabels(Instances D, int indices[][], int p, int n) { int L = D.classIndex(); int K = indices.length; ArrayList<String> values[] = new ArrayList[K]; HashMap<String, Integer> counts[] = new HashMap[K]; // create D_ Instances D_ = new Instances(D); // clear D_ for (int j = 0; j < L; j++) { D_.deleteAttributeAt(0); } // create atts for (int j = 0; j < K; j++) { int att[] = indices[j]; //int values[] = new int[2]; //getValues(indices,D,p); counts[j] = getCounts(D, att, p); Set<String> vals = counts[j].keySet(); //getValues(D,att,p); values[j] = new ArrayList(vals); D_.insertAttributeAt(new Attribute(encodeClass(att), new ArrayList(vals)), j); } // copy over values ArrayList<Integer> deleteList = new ArrayList<Integer>(); for (int i = 0; i < D.numInstances(); i++) { Instance x = D.instance(i); for (int j = 0; j < K; j++) { String y = encodeValue(x, indices[j]); try { D_.instance(i).setValue(j, y); // y = } catch (Exception e) { // value not allowed deleteList.add(i); // mark it for deletion String y_close[] = NSR.getTopNSubsets(y, counts[j], n); // get N subsets for (int m = 0; m < y_close.length; m++) { //System.out.println("add "+y_close[m]+" "+counts[j]); Instance x_copy = (Instance) D_.instance(i).copy(); x_copy.setValue(j, y_close[m]); x_copy.setWeight(1.0 / y_close.length); D_.add(x_copy); } } } } // clean up Collections.sort(deleteList, Collections.reverseOrder()); //System.out.println("Deleting "+deleteList.size()+" defunct instances."); for (int i : deleteList) { D_.delete(i); } // set class D_.setClassIndex(K); // done! D = null; return D_; }
From source file:meka.filters.multilabel.SuperNodeFilter.java
License:Open Source License
/** * Merge Labels./*from w w w. ja v a2s. co m*/ * * @param j index 1 (assume that <code>j < k</code>) * @param k index 2 (assume that <code>j < k</code>) * @param D iInstances, with attributes in labeled by original index * @return Instaces with attributes at j and k moved to position L as (j,k), with classIndex = L-1 */ public static Instances mergeLabels(Instances D, int j, int k, int p) { int L = D.classIndex(); HashMap<String, Integer> count = new HashMap<String, Integer>(); Set<String> values = new HashSet<String>(); for (int i = 0; i < D.numInstances(); i++) { String v = encodeValue(D.instance(i).stringValue(j), D.instance(i).stringValue(k)); String w = "" + (int) D.instance(i).value(j) + (int) D.instance(i).value(k); //System.out.println("w = "+w); count.put(v, count.containsKey(v) ? count.get(v) + 1 : 1); values.add(encodeValue(D.instance(i).stringValue(j), D.instance(i).stringValue(k))); } //System.out.println("("+j+","+k+")"+values); System.out.print("pruned from " + count.size() + " to "); MLUtils.pruneCountHashMap(count, p); String y_max = (String) MLUtils.argmax(count); // @todo won't need this in the future System.out.println("" + count.size() + " with p = " + p); System.out.println("" + count); values = count.keySet(); // Create and insert the new attribute D.insertAttributeAt( new Attribute(encodeClass(D.attribute(j).name(), D.attribute(k).name()), new ArrayList(values)), L); // Set values for the new attribute for (int i = 0; i < D.numInstances(); i++) { Instance x = D.instance(i); String y_jk = encodeValue(x.stringValue(j), x.stringValue(k)); try { x.setValue(L, y_jk); // y_jk = } catch (Exception e) { //x.setMissing(L); //D.delete(i); //i--; String y_close[] = getNeighbours(y_jk, count, 1); // A+B+NEG, A+C+NEG //System.out.println("OK, that value ("+y_jk+") didn't exist ... set the closests ones ...: "+Arrays.toString(y_close)); int max_c = 0; for (String y_ : y_close) { int c = count.get(y_); if (c > max_c) { max_c = c; y_max = y_; } } //System.out.println("we actually found "+Arrays.toString(y_close)+" but will only set one for now (the one with the highest count) : "+y_max+" ..."); x.setValue(L, y_max); // ok, that value didn't exist, set the maximum one (@TODO: set the nearest one) } } // Delete separate attributes D.deleteAttributeAt(k > j ? k : j); D.deleteAttributeAt(k > j ? j : k); // Set class index D.setClassIndex(L - 1); return D; }
From source file:milk.classifiers.MIRBFNetwork.java
License:Open Source License
public Exemplars transform(Exemplars ex) throws Exception { // Throw all the instances together Instances data = new Instances(ex.exemplar(0).getInstances()); for (int i = 0; i < ex.numExemplars(); i++) { Exemplar curr = ex.exemplar(i);/*from w ww . ja v a 2 s . c o m*/ double weight = 1.0 / (double) curr.getInstances().numInstances(); for (int j = 0; j < curr.getInstances().numInstances(); j++) { Instance inst = (Instance) curr.getInstances().instance(j).copy(); inst.setWeight(weight); data.add(inst); } } double factor = (double) data.numInstances() / (double) data.sumOfWeights(); for (int i = 0; i < data.numInstances(); i++) { data.instance(i).setWeight(data.instance(i).weight() * factor); } SimpleKMeans kMeans = new SimpleKMeans(); kMeans.setNumClusters(m_num_clusters); MakeDensityBasedClusterer clust = new MakeDensityBasedClusterer(); clust.setClusterer(kMeans); m_clm.setDensityBasedClusterer(clust); m_clm.setIgnoredAttributeIndices("" + (ex.exemplar(0).idIndex() + 1)); m_clm.setInputFormat(data); // Use filter and discard result Instances tempData = Filter.useFilter(data, m_clm); tempData = new Instances(tempData, 0); tempData.insertAttributeAt(ex.exemplar(0).getInstances().attribute(0), 0); // Go through exemplars and add them to new dataset Exemplars newExs = new Exemplars(tempData); for (int i = 0; i < ex.numExemplars(); i++) { Exemplar curr = ex.exemplar(i); Instances temp = Filter.useFilter(curr.getInstances(), m_clm); temp.insertAttributeAt(ex.exemplar(0).getInstances().attribute(0), 0); for (int j = 0; j < temp.numInstances(); j++) { temp.instance(j).setValue(0, curr.idValue()); } newExs.add(new Exemplar(temp)); } //System.err.println("Finished transforming"); //System.err.println(newExs); return newExs; }
From source file:milk.classifiers.MIRBFNetwork.java
License:Open Source License
public Exemplar transform(Exemplar test) throws Exception { Instances temp = Filter.useFilter(test.getInstances(), m_clm); temp.insertAttributeAt(test.getInstances().attribute(0), 0); for (int j = 0; j < temp.numInstances(); j++) { temp.instance(j).setValue(0, test.idValue()); //System.err.println(temp.instance(j)); }//from w ww . j a v a 2 s . c o m return new Exemplar(temp); }