List of usage examples for weka.core Instances attribute
publicAttribute attribute(String name)
From source file:meka.core.MLUtils.java
License:Open Source License
public static final String toDebugString(Instances D) { int L = D.classIndex(); StringBuilder sb = new StringBuilder(); sb.append("D=" + D.numInstances()); sb.append(" L=" + L + " {"); for (int j = 0; j < L; j++) { sb.append(D.attribute(j).name() + " "); }//from www . j a v a2 s .co m sb.append("}"); return sb.toString(); }
From source file:meka.core.StatUtils.java
License:Open Source License
/** * jPMF - Joint PMF./*from ww w. j a va 2 s . c o m*/ * @return the joint PMF of the j-th and k-th labels in D. */ public static double[][] jPMF(Instances D, int j, int k) { double JOINT[][] = new double[D.attribute(j).numValues()][D.attribute(k).numValues()]; int N = D.numInstances(); for (int i = 0; i < N; i++) { int v_j = (int) Math.round(D.instance(i).value(j)); int v_k = (int) Math.round(D.instance(i).value(k)); JOINT[v_j][v_k] += (1.0 / (double) N); } return JOINT; }
From source file:meka.core.StatUtils.java
License:Open Source License
/** * Joint Distribution./*from w w w . jav a2 s . c o m*/ * @return the joint PMF of the j-th and k-th and lthlabels in D. */ public static double[][][] jPMF(Instances D, int j, int k, int l) { double JOINT[][][] = new double[D.attribute(j).numValues()][D.attribute(k).numValues()][D.attribute(l) .numValues()]; int N = D.numInstances(); for (int i = 0; i < N; i++) { int v_j = (int) Math.round(D.instance(i).value(j)); int v_k = (int) Math.round(D.instance(i).value(k)); int v_l = (int) Math.round(D.instance(i).value(l)); JOINT[v_j][v_k][v_l] += (1.0 / (double) N); } return JOINT; }
From source file:meka.core.StatUtils.java
License:Open Source License
/** * I - Mutual Information.//w ww.j av a2 s . com * <br> * NOTE Multi-target friendly (does not assume binary labels). * <br> * NOTE a bit slow * @return I(Y_j;Y_k) in dataset D. */ public static double I(Instances D, int j, int k) { double I = 0.0; for (int x = 0; x < D.attribute(j).numValues(); x++) { double p_x = p(D, j, x); for (int y = 0; y < D.attribute(k).numValues(); y++) { double p_y = p(D, k, y); double p_xy = P(D, j, x, k, y); I += p_xy * Math.log(p_xy / (p_x * p_y)); } } return I; }
From source file:meka.core.SuperLabelUtils.java
License:Open Source License
/** * Get Partition From Dataset Hierarchy - assumes attributes are hierarchically arranged with '.'. * For example europe.spain indicates leafnode spain of branch europe. * @param D Dataset//from w w w . j a v a 2 s . c o m * @return partition */ public static final int[][] getPartitionFromDatasetHierarchy(Instances D) { HashMap<String, LabelSet> map = new HashMap<String, LabelSet>(); int L = D.classIndex(); for (int j = 0; j < L; j++) { String s = D.attribute(j).name().split("\\.")[0]; LabelSet Y = map.get(s); if (Y == null) Y = new LabelSet(new int[] { j }); else { Y.indices = A.append(Y.indices, j); Arrays.sort(Y.indices); } map.put(s, Y); } int partition[][] = new int[map.size()][]; int i = 0; for (LabelSet part : map.values()) { //System.out.println(""+i+": "+Arrays.toString(part.indices)); partition[i++] = part.indices; } return partition; }
From source file:meka.experiment.statisticsexporters.WekaFilter.java
License:Open Source License
/** * Turns the statistics into Instances./* w ww. j ava 2s .co m*/ * * @param stats the statistics to convert * @return the generated data */ protected Instances toInstances(List<EvaluationStatistics> stats) { Instances result; ArrayList<Attribute> atts; List<String> headers; Instance inst; double[] values; int i; // header headers = EvaluationStatisticsUtils.headers(stats, true, true); atts = new ArrayList<>(); for (String header : headers) { if (header.equals(EvaluationStatistics.KEY_CLASSIFIER) || header.equals(EvaluationStatistics.KEY_RELATION)) atts.add(new Attribute(header, (List) null)); else atts.add(new Attribute(header)); } result = new Instances("stats", atts, stats.size()); // data for (EvaluationStatistics stat : stats) { values = new double[result.numAttributes()]; for (i = 0; i < values.length; i++) { if (headers.get(i).equals(EvaluationStatistics.KEY_CLASSIFIER)) values[i] = result.attribute(i).addStringValue(stat.getCommandLine()); else if (headers.get(i).equals(EvaluationStatistics.KEY_RELATION)) values[i] = result.attribute(i).addStringValue(stat.getRelation()); else if (stat.containsKey(headers.get(i))) values[i] = stat.get(headers.get(i)).doubleValue(); else values[i] = Utils.missingValue(); } inst = new DenseInstance(1.0, values); result.add(inst); } return result; }
From source file:meka.experiment.statisticsexporters.WekaFilter.java
License:Open Source License
/** * Converts the Instances back into statistics. * * @param data the data to convert * @return the generated statistics *//* ww w . j a v a 2 s .c om*/ protected List<EvaluationStatistics> fromInstances(Instances data) { List<EvaluationStatistics> result; EvaluationStatistics stat; MultiLabelClassifier cls; String rel; int i; int n; Instance inst; result = new ArrayList<>(); if (data.attribute(EvaluationStatistics.KEY_CLASSIFIER) == null) { log("Failed to locate attribute: " + EvaluationStatistics.KEY_CLASSIFIER); return result; } if (data.attribute(EvaluationStatistics.KEY_RELATION) == null) { log("Failed to locate attribute: " + EvaluationStatistics.KEY_RELATION); return result; } for (i = 0; i < data.numInstances(); i++) { inst = data.instance(i); try { cls = OptionUtils.fromCommandLine(MultiLabelClassifier.class, inst.stringValue(data.attribute(EvaluationStatistics.KEY_CLASSIFIER))); rel = inst.stringValue(data.attribute(EvaluationStatistics.KEY_RELATION)); stat = new EvaluationStatistics(cls, rel, null); for (n = 0; n < inst.numAttributes(); n++) { if (inst.attribute(n).isNumeric() && !inst.isMissing(n)) { stat.put(inst.attribute(n).name(), inst.value(n)); } } result.add(stat); } catch (Exception e) { handleException("Failed to process instance: " + inst, e); } } return result; }
From source file:meka.filters.multilabel.SuperNodeFilter.java
License:Open Source License
/** * Merge Labels./* ww w . j ava2s . c o m*/ * * @param j index 1 (assume that <code>j < k</code>) * @param k index 2 (assume that <code>j < k</code>) * @param D iInstances, with attributes in labeled by original index * @return Instaces with attributes at j and k moved to position L as (j,k), with classIndex = L-1 */ public static Instances mergeLabels(Instances D, int j, int k, int p) { int L = D.classIndex(); HashMap<String, Integer> count = new HashMap<String, Integer>(); Set<String> values = new HashSet<String>(); for (int i = 0; i < D.numInstances(); i++) { String v = encodeValue(D.instance(i).stringValue(j), D.instance(i).stringValue(k)); String w = "" + (int) D.instance(i).value(j) + (int) D.instance(i).value(k); //System.out.println("w = "+w); count.put(v, count.containsKey(v) ? count.get(v) + 1 : 1); values.add(encodeValue(D.instance(i).stringValue(j), D.instance(i).stringValue(k))); } //System.out.println("("+j+","+k+")"+values); System.out.print("pruned from " + count.size() + " to "); MLUtils.pruneCountHashMap(count, p); String y_max = (String) MLUtils.argmax(count); // @todo won't need this in the future System.out.println("" + count.size() + " with p = " + p); System.out.println("" + count); values = count.keySet(); // Create and insert the new attribute D.insertAttributeAt( new Attribute(encodeClass(D.attribute(j).name(), D.attribute(k).name()), new ArrayList(values)), L); // Set values for the new attribute for (int i = 0; i < D.numInstances(); i++) { Instance x = D.instance(i); String y_jk = encodeValue(x.stringValue(j), x.stringValue(k)); try { x.setValue(L, y_jk); // y_jk = } catch (Exception e) { //x.setMissing(L); //D.delete(i); //i--; String y_close[] = getNeighbours(y_jk, count, 1); // A+B+NEG, A+C+NEG //System.out.println("OK, that value ("+y_jk+") didn't exist ... set the closests ones ...: "+Arrays.toString(y_close)); int max_c = 0; for (String y_ : y_close) { int c = count.get(y_); if (c > max_c) { max_c = c; y_max = y_; } } //System.out.println("we actually found "+Arrays.toString(y_close)+" but will only set one for now (the one with the highest count) : "+y_max+" ..."); x.setValue(L, y_max); // ok, that value didn't exist, set the maximum one (@TODO: set the nearest one) } } // Delete separate attributes D.deleteAttributeAt(k > j ? k : j); D.deleteAttributeAt(k > j ? j : k); // Set class index D.setClassIndex(L - 1); return D; }
From source file:meka.gui.explorer.classify.AbstractShowThresholdCurve.java
License:Open Source License
/** * Sets the combobox indices./*from www .ja va 2 s.c om*/ * * @param data the threshold curve data * @param panel the panel * @throws Exception if setting of indices fails */ protected void setComboBoxIndices(Instances data, ThresholdVisualizePanel panel) throws Exception { if (data.attribute(getDefaultXColumn()) != null) panel.setXIndex(data.attribute(getDefaultXColumn()).index()); if (data.attribute(getDefaultYColumn()) != null) panel.setYIndex(data.attribute(getDefaultYColumn()).index()); }
From source file:meka.gui.explorer.classify.IncrementalPerformance.java
License:Open Source License
/** * Creates a panel displaying the data.//from w ww . jav a 2 s .c om * * @param data the plot data * @return the panel * @throws Exception if plot generation fails */ protected VisualizePanel createPanel(Instances data) throws Exception { VisualizePanel result = new ThresholdVisualizePanel(); PlotData2D plot = new PlotData2D(data); plot.setPlotName("Incremental performance"); plot.m_displayAllPoints = true; boolean[] connectPoints = new boolean[data.numInstances()]; for (int cp = 1; cp < connectPoints.length; cp++) connectPoints[cp] = true; plot.setConnectPoints(connectPoints); result.addPlot(plot); if (data.attribute(SAMPLES) != null) result.setXIndex(data.attribute(SAMPLES).index()); if (data.attribute(ACCURACY) != null) result.setYIndex(data.attribute(ACCURACY).index()); return result; }