Example usage for weka.core Instances attribute

Introduction

In this page you can find the example usage for weka.core Instances attribute.

Prototype

publicAttribute attribute(String name)

Source Link

Document

Returns an attribute given its name.

Usage

From source file:meka.core.MLUtils.java

License:Open Source License

public static final String toDebugString(Instances D) {
    int L = D.classIndex();
    StringBuilder sb = new StringBuilder();
    sb.append("D=" + D.numInstances());
    sb.append(" L=" + L + " {");
    for (int j = 0; j < L; j++) {
        sb.append(D.attribute(j).name() + " ");
    }//from  www  . j a  v a2 s  .co m
    sb.append("}");
    return sb.toString();
}

From source file:meka.core.StatUtils.java

License:Open Source License

/**
 * jPMF - Joint PMF./*from ww w. j a va  2 s  . c o  m*/
 * @return the joint PMF of the j-th and k-th labels in D.
 */
public static double[][] jPMF(Instances D, int j, int k) {
    double JOINT[][] = new double[D.attribute(j).numValues()][D.attribute(k).numValues()];
    int N = D.numInstances();
    for (int i = 0; i < N; i++) {
        int v_j = (int) Math.round(D.instance(i).value(j));
        int v_k = (int) Math.round(D.instance(i).value(k));
        JOINT[v_j][v_k] += (1.0 / (double) N);
    }
    return JOINT;
}

From source file:meka.core.StatUtils.java

License:Open Source License

/**
 * Joint Distribution./*from  w w w . jav a2 s . c  o  m*/
 * @return the joint PMF of the j-th and k-th and lthlabels in D.
 */
public static double[][][] jPMF(Instances D, int j, int k, int l) {
    double JOINT[][][] = new double[D.attribute(j).numValues()][D.attribute(k).numValues()][D.attribute(l)
            .numValues()];
    int N = D.numInstances();
    for (int i = 0; i < N; i++) {
        int v_j = (int) Math.round(D.instance(i).value(j));
        int v_k = (int) Math.round(D.instance(i).value(k));
        int v_l = (int) Math.round(D.instance(i).value(l));
        JOINT[v_j][v_k][v_l] += (1.0 / (double) N);
    }
    return JOINT;
}

From source file:meka.core.StatUtils.java

License:Open Source License

/**
 * I - Mutual Information.//w ww.j av  a2  s . com
 * <br>
 * NOTE Multi-target friendly (does not assume binary labels).
 * <br>
 * NOTE a bit slow
 * @return I(Y_j;Y_k) in dataset D.
 */
public static double I(Instances D, int j, int k) {
    double I = 0.0;
    for (int x = 0; x < D.attribute(j).numValues(); x++) {
        double p_x = p(D, j, x);
        for (int y = 0; y < D.attribute(k).numValues(); y++) {
            double p_y = p(D, k, y);
            double p_xy = P(D, j, x, k, y);
            I += p_xy * Math.log(p_xy / (p_x * p_y));
        }
    }
    return I;
}

From source file:meka.core.SuperLabelUtils.java

License:Open Source License

/**
 * Get Partition From Dataset Hierarchy - assumes attributes are hierarchically arranged with '.'. 
 * For example europe.spain indicates leafnode spain of branch europe.
 * @param   D      Dataset//from   w  w  w  .  j  a v a  2  s  .  c o m
 * @return   partition
 */
public static final int[][] getPartitionFromDatasetHierarchy(Instances D) {
    HashMap<String, LabelSet> map = new HashMap<String, LabelSet>();
    int L = D.classIndex();
    for (int j = 0; j < L; j++) {
        String s = D.attribute(j).name().split("\\.")[0];
        LabelSet Y = map.get(s);
        if (Y == null)
            Y = new LabelSet(new int[] { j });
        else {
            Y.indices = A.append(Y.indices, j);
            Arrays.sort(Y.indices);
        }
        map.put(s, Y);
    }
    int partition[][] = new int[map.size()][];
    int i = 0;
    for (LabelSet part : map.values()) {
        //System.out.println(""+i+": "+Arrays.toString(part.indices));
        partition[i++] = part.indices;
    }
    return partition;
}

From source file:meka.experiment.statisticsexporters.WekaFilter.java

License:Open Source License

/**
 * Turns the statistics into Instances./*  w ww. j ava  2s  .co  m*/
 *
 * @param stats         the statistics to convert
 * @return              the generated data
 */
protected Instances toInstances(List<EvaluationStatistics> stats) {
    Instances result;
    ArrayList<Attribute> atts;
    List<String> headers;
    Instance inst;
    double[] values;
    int i;

    // header
    headers = EvaluationStatisticsUtils.headers(stats, true, true);
    atts = new ArrayList<>();
    for (String header : headers) {
        if (header.equals(EvaluationStatistics.KEY_CLASSIFIER)
                || header.equals(EvaluationStatistics.KEY_RELATION))
            atts.add(new Attribute(header, (List) null));
        else
            atts.add(new Attribute(header));
    }
    result = new Instances("stats", atts, stats.size());

    // data
    for (EvaluationStatistics stat : stats) {
        values = new double[result.numAttributes()];
        for (i = 0; i < values.length; i++) {
            if (headers.get(i).equals(EvaluationStatistics.KEY_CLASSIFIER))
                values[i] = result.attribute(i).addStringValue(stat.getCommandLine());
            else if (headers.get(i).equals(EvaluationStatistics.KEY_RELATION))
                values[i] = result.attribute(i).addStringValue(stat.getRelation());
            else if (stat.containsKey(headers.get(i)))
                values[i] = stat.get(headers.get(i)).doubleValue();
            else
                values[i] = Utils.missingValue();
        }
        inst = new DenseInstance(1.0, values);
        result.add(inst);
    }

    return result;
}

From source file:meka.experiment.statisticsexporters.WekaFilter.java

License:Open Source License

/**
 * Converts the Instances back into statistics.
 *
 * @param data          the data to convert
 * @return              the generated statistics
 *//* ww w . j a  v  a  2  s .c  om*/
protected List<EvaluationStatistics> fromInstances(Instances data) {
    List<EvaluationStatistics> result;
    EvaluationStatistics stat;
    MultiLabelClassifier cls;
    String rel;
    int i;
    int n;
    Instance inst;

    result = new ArrayList<>();

    if (data.attribute(EvaluationStatistics.KEY_CLASSIFIER) == null) {
        log("Failed to locate attribute: " + EvaluationStatistics.KEY_CLASSIFIER);
        return result;
    }
    if (data.attribute(EvaluationStatistics.KEY_RELATION) == null) {
        log("Failed to locate attribute: " + EvaluationStatistics.KEY_RELATION);
        return result;
    }

    for (i = 0; i < data.numInstances(); i++) {
        inst = data.instance(i);
        try {
            cls = OptionUtils.fromCommandLine(MultiLabelClassifier.class,
                    inst.stringValue(data.attribute(EvaluationStatistics.KEY_CLASSIFIER)));
            rel = inst.stringValue(data.attribute(EvaluationStatistics.KEY_RELATION));
            stat = new EvaluationStatistics(cls, rel, null);
            for (n = 0; n < inst.numAttributes(); n++) {
                if (inst.attribute(n).isNumeric() && !inst.isMissing(n)) {
                    stat.put(inst.attribute(n).name(), inst.value(n));
                }
            }
            result.add(stat);
        } catch (Exception e) {
            handleException("Failed to process instance: " + inst, e);
        }
    }

    return result;
}

From source file:meka.filters.multilabel.SuperNodeFilter.java

License:Open Source License

/**
 * Merge Labels./* ww  w  . j  ava2s  . c  o m*/
 *
 * @param   j    index 1 (assume that <code>j &lt; k</code>)
 * @param   k   index 2 (assume that <code>j &lt; k</code>)
 * @param   D   iInstances, with attributes in labeled by original index
 * @return       Instaces with attributes at j and k moved to position L as (j,k), with classIndex = L-1
 */
public static Instances mergeLabels(Instances D, int j, int k, int p) {
    int L = D.classIndex();

    HashMap<String, Integer> count = new HashMap<String, Integer>();

    Set<String> values = new HashSet<String>();
    for (int i = 0; i < D.numInstances(); i++) {
        String v = encodeValue(D.instance(i).stringValue(j), D.instance(i).stringValue(k));
        String w = "" + (int) D.instance(i).value(j) + (int) D.instance(i).value(k);
        //System.out.println("w = "+w);
        count.put(v, count.containsKey(v) ? count.get(v) + 1 : 1);
        values.add(encodeValue(D.instance(i).stringValue(j), D.instance(i).stringValue(k)));
    }
    //System.out.println("("+j+","+k+")"+values);
    System.out.print("pruned from " + count.size() + " to ");
    MLUtils.pruneCountHashMap(count, p);
    String y_max = (String) MLUtils.argmax(count); // @todo won't need this in the future
    System.out.println("" + count.size() + " with p = " + p);
    System.out.println("" + count);
    values = count.keySet();

    // Create and insert the new attribute
    D.insertAttributeAt(
            new Attribute(encodeClass(D.attribute(j).name(), D.attribute(k).name()), new ArrayList(values)), L);

    // Set values for the new attribute
    for (int i = 0; i < D.numInstances(); i++) {
        Instance x = D.instance(i);
        String y_jk = encodeValue(x.stringValue(j), x.stringValue(k));
        try {
            x.setValue(L, y_jk); // y_jk = 
        } catch (Exception e) {
            //x.setMissing(L);
            //D.delete(i);
            //i--;
            String y_close[] = getNeighbours(y_jk, count, 1); // A+B+NEG, A+C+NEG
            //System.out.println("OK, that value ("+y_jk+") didn't exist ... set the closests ones ...: "+Arrays.toString(y_close));
            int max_c = 0;
            for (String y_ : y_close) {
                int c = count.get(y_);
                if (c > max_c) {
                    max_c = c;
                    y_max = y_;
                }
            }
            //System.out.println("we actually found "+Arrays.toString(y_close)+" but will only set one for now (the one with the highest count) : "+y_max+" ...");
            x.setValue(L, y_max);
            // ok, that value didn't exist, set the maximum one (@TODO: set the nearest one)
        }
    }

    // Delete separate attributes
    D.deleteAttributeAt(k > j ? k : j);
    D.deleteAttributeAt(k > j ? j : k);

    // Set class index
    D.setClassIndex(L - 1);
    return D;
}

From source file:meka.gui.explorer.classify.AbstractShowThresholdCurve.java

License:Open Source License

/**
 * Sets the combobox indices./*from   www  .ja  va 2 s.c  om*/
 *
 * @param data          the threshold curve data
 * @param panel         the panel
 * @throws Exception    if setting of indices fails
 */
protected void setComboBoxIndices(Instances data, ThresholdVisualizePanel panel) throws Exception {
    if (data.attribute(getDefaultXColumn()) != null)
        panel.setXIndex(data.attribute(getDefaultXColumn()).index());
    if (data.attribute(getDefaultYColumn()) != null)
        panel.setYIndex(data.attribute(getDefaultYColumn()).index());
}

From source file:meka.gui.explorer.classify.IncrementalPerformance.java

License:Open Source License

/**
 * Creates a panel displaying the data.//from w ww  .  jav a  2 s .c  om
 *
 * @param data          the plot data
 * @return              the panel
 * @throws Exception    if plot generation fails
 */
protected VisualizePanel createPanel(Instances data) throws Exception {
    VisualizePanel result = new ThresholdVisualizePanel();
    PlotData2D plot = new PlotData2D(data);
    plot.setPlotName("Incremental performance");
    plot.m_displayAllPoints = true;
    boolean[] connectPoints = new boolean[data.numInstances()];
    for (int cp = 1; cp < connectPoints.length; cp++)
        connectPoints[cp] = true;
    plot.setConnectPoints(connectPoints);
    result.addPlot(plot);
    if (data.attribute(SAMPLES) != null)
        result.setXIndex(data.attribute(SAMPLES).index());
    if (data.attribute(ACCURACY) != null)
        result.setYIndex(data.attribute(ACCURACY).index());
    return result;
}