Example usage for weka.core Instances setClassIndex

List of usage examples for weka.core Instances setClassIndex

Introduction

In this page you can find the example usage for weka.core Instances setClassIndex.

Prototype

public void setClassIndex(int classIndex) 

Source Link

Document

Sets the class index of the set.

Usage

From source file:meka.core.PSUtils.java

License:Open Source License

/**
 * Transform instances into a multi-class representation.
 * @param D         original dataset/*from  ww w . j  a  v a 2 s .  c om*/
 * @param L         number of labels in the original dataset
 * @param cname      class name for the new dataset (may want to encode the list of indices here for RAkEL-like methods)
 * @param p         pruning value
 * @param n         restoration value
 * @return transformed dataset
 */
public static Instances PSTransformation(Instances D, int L, String cname, int p, int n) {
    D = new Instances(D);

    // Gather combinations
    HashMap<LabelSet, Integer> distinctCombinations = PSUtils.countCombinationsSparse(D, L);

    // Prune combinations
    if (p > 0)
        MLUtils.pruneCountHashMap(distinctCombinations, p);

    // Check there are > 2
    if (distinctCombinations.size() <= 1 && p > 0) {
        // ... or try again if not ...
        System.err.println("[Warning] You did too much pruning, setting P = P-1");
        return PSTransformation(D, L, cname, p - 1, n);
    }

    // Create class attribute
    ArrayList<String> ClassValues = new ArrayList<String>();
    for (LabelSet y : distinctCombinations.keySet())
        ClassValues.add(y.toString());
    Attribute C = new Attribute(cname, ClassValues);

    // Insert new special attribute (which has all possible combinations of labels) 
    D.insertAttributeAt(C, L);
    D.setClassIndex(L);

    //Add class values
    int N = D.numInstances();
    for (int i = 0; i < N; i++) {
        Instance x = D.instance(i);
        LabelSet y = new LabelSet(MLUtils.toSparseIntArray(x, L));
        String y_string = y.toString();

        // add it
        if (ClassValues.contains(y_string)) //if its class value exists
            x.setClassValue(y_string);
        // decomp
        else if (n > 0) {
            //String d_subsets[] = getTopNSubsets(comb,distinctCombinations,n);
            LabelSet d_subsets[] = PSUtils.getTopNSubsets(y, distinctCombinations, n);
            //LabelSet d_subsets[] = PSUtils.cover(y,distinctCombinations);
            if (d_subsets.length > 0) {
                // fast
                x.setClassValue(d_subsets[0].toString());
                // additional
                if (d_subsets.length > 1) {
                    for (int s_i = 1; s_i < d_subsets.length; s_i++) {
                        Instance x_ = (Instance) (x).copy();
                        x_.setClassValue(d_subsets[s_i].toString());
                        D.add(x_);
                    }
                }
            } else {
                x.setClassMissing();
            }
        }
    }

    // remove with missing class
    D.deleteWithMissingClass();

    try {
        D = F.removeLabels(D, L);
    } catch (Exception e) {
        // should never happen
    }
    D.setClassIndex(0);

    return D;
}

From source file:meka.core.PSUtils.java

License:Open Source License

/**
 * Transform instances into a multi-class representation.
 * @param D         original dataset//from   w  w w . j  ava2  s  . c  om
 * @param L         number of labels in that dataset
 * @param cname      class name for the new dataset (may want to encode the list of indices here for RAkEL-like methods)
 * @param p         pruning value
 * @param n         restoration value
 * @return transformed dataset
 */
public static Instances SLTransformation(Instances D, int L, String cname, int p, int n) {
    D = new Instances(D);

    // Gather combinations
    HashMap<LabelSet, Integer> distinctCombinations = PSUtils.countCombinationsSparse(D, L);

    // Prune combinations
    if (p > 0)
        MLUtils.pruneCountHashMap(distinctCombinations, p);

    // Check there are > 2
    if (distinctCombinations.size() <= 1 && p > 0) {
        // ... or try again if not ...
        System.err.println("[Warning] You did too much pruning, setting P = P-1");
        return PSTransformation(D, L, cname, p - 1, n);
    }

    // Create class attribute
    ArrayList<String> ClassValues = new ArrayList<String>();
    for (LabelSet y : distinctCombinations.keySet())
        ClassValues.add(y.toString());
    Attribute C = new Attribute(cname, ClassValues);

    // Insert new special attribute (which has all possible combinations of labels)
    D.insertAttributeAt(C, L);
    D.setClassIndex(L);

    //Add class values
    int N = D.numInstances();
    for (int i = 0; i < N; i++) {
        Instance x = D.instance(i);
        LabelSet y = new LabelSet(MLUtils.toSparseIntArray(x, L));
        String y_string = y.toString();

        // add it
        if (ClassValues.contains(y_string)) //if its class value exists
            x.setClassValue(y_string);
        // decomp
        else if (n > 0) {
            //String d_subsets[] = getTopNSubsets(comb,distinctCombinations,n);
            LabelSet d_subsets[] = PSUtils.getTopNSubsets(y, distinctCombinations, n);
            //LabelSet d_subsets[] = PSUtils.cover(y,distinctCombinations);
            if (d_subsets.length > 0) {
                // fast
                x.setClassValue(d_subsets[0].toString());
                // additional
                if (d_subsets.length > 1) {
                    for (int s_i = 1; s_i < d_subsets.length; s_i++) {
                        Instance x_ = (Instance) (x).copy();
                        x_.setClassValue(d_subsets[s_i].toString());
                        D.add(x_);
                    }
                }
            } else {
                x.setClassMissing();
            }
        }
    }

    // remove with missing class
    D.deleteWithMissingClass();

    try {
        D = F.removeLabels(D, L);
    } catch (Exception e) {
        // should never happen
    }
    D.setClassIndex(0);

    return D;
}

From source file:meka.core.SuperLabelUtils.java

License:Open Source License

/**
 * Make Partition Dataset - out of dataset D, on indices part[].
 * @param   D      regular multi-label dataset (of L = classIndex() labels)
 * @param   part   list of indices we want to make into a PS dataset.
 * @param   P      see {@link PSUtils}//ww  w. java  2  s.c om
 * @param   N      see {@link PSUtils}
 * @return Dataset with 1 multi-valued class label, representing the combinations of part[].
 */
public static Instances makePartitionDataset(Instances D, int part[], int P, int N) throws Exception {
    int L = D.classIndex();
    Instances D_ = new Instances(D);
    // strip out irrelevant attributes
    D_.setClassIndex(-1);
    D_ = F.keepLabels(D, L, part);
    D_.setClassIndex(part.length);
    // make LC transformation
    D_ = PSUtils.PSTransformation(D_, P, N);
    return D_;
}

From source file:meka.core.SuperLabelUtils.java

License:Open Source License

/**
 * Super Label Transformation - transform dataset D into a dataset with <code>k</code> multi-class target attributes.
 * Use the NSR/PS-style pruning and recomposition, according to partition 'indices', and pruning values 'p' and 'n'.
 * @see PSUtils.PSTransformation/*  www  .j a  v  a2 s.  c o  m*/
 * @param indices   m by k: m super variables, each relating to k original variables
 * @param    D   either multi-label or multi-target dataset
 * @param    p   pruning value
 * @param    n   subset relpacement value
 * @return       a multi-target dataset
 */
public static Instances SLTransformation(Instances D, int indices[][], int p, int n) {

    int L = D.classIndex();
    int K = indices.length;
    ArrayList<String> values[] = new ArrayList[K];
    HashMap<String, Integer> counts[] = new HashMap[K];

    // create D_
    Instances D_ = new Instances(D);

    // clear D_
    // F.removeLabels(D_,L);
    for (int j = 0; j < L; j++) {
        D_.deleteAttributeAt(0);
    }

    // create atts
    for (int j = 0; j < K; j++) {
        int att[] = indices[j];
        //int values[] = new int[2]; //getValues(indices,D,p);
        counts[j] = getCounts(D, att, p);
        Set<String> vals = counts[j].keySet(); //getValues(D,att,p);
        values[j] = new ArrayList(vals);
        D_.insertAttributeAt(new Attribute(encodeClass(att), new ArrayList(vals)), j);
    }

    // copy over values
    ArrayList<Integer> deleteList = new ArrayList<Integer>();
    for (int i = 0; i < D.numInstances(); i++) {
        Instance x = D.instance(i);
        for (int j = 0; j < K; j++) {
            String y = encodeValue(x, indices[j]);
            try {
                D_.instance(i).setValue(j, y); // y =
            } catch (Exception e) {
                // value not allowed
                deleteList.add(i); // mark it for deletion
                String y_close[] = getTopNSubsets(y, counts[j], n); // get N subsets
                for (int m = 0; m < y_close.length; m++) {
                    //System.out.println("add "+y_close[m]+" "+counts[j]);
                    Instance x_copy = (Instance) D_.instance(i).copy();
                    x_copy.setValue(j, y_close[m]);
                    x_copy.setWeight(1.0 / y_close.length);
                    D_.add(x_copy);
                }
            }
        }
    }
    // clean up
    Collections.sort(deleteList, Collections.reverseOrder());
    //System.out.println("Deleting "+deleteList.size()+" defunct instances.");
    for (int i : deleteList) {
        D_.delete(i);
    }
    // set class
    D_.setClassIndex(K);
    // done!
    return D_;
}

From source file:meka.filters.multilabel.SuperNodeFilter.java

License:Open Source License

/**
 * Merge Labels - Make a new 'D', with labels made into superlabels, according to partition 'indices', and pruning values 'p' and 'n'.
 * @param    D   assume attributes in D labeled by original index
 * @return       Instances with attributes at j and k moved to position L as (j,k), with classIndex = L-1
 *//*w ww . ja v  a  2s.co  m*/
public static Instances mergeLabels(Instances D, int indices[][], int p, int n) {

    int L = D.classIndex();
    int K = indices.length;
    ArrayList<String> values[] = new ArrayList[K];
    HashMap<String, Integer> counts[] = new HashMap[K];

    // create D_
    Instances D_ = new Instances(D);

    // clear D_
    for (int j = 0; j < L; j++) {
        D_.deleteAttributeAt(0);
    }

    // create atts
    for (int j = 0; j < K; j++) {
        int att[] = indices[j];
        //int values[] = new int[2]; //getValues(indices,D,p);
        counts[j] = getCounts(D, att, p);
        Set<String> vals = counts[j].keySet(); //getValues(D,att,p);
        values[j] = new ArrayList(vals);
        D_.insertAttributeAt(new Attribute(encodeClass(att), new ArrayList(vals)), j);
    }

    // copy over values
    ArrayList<Integer> deleteList = new ArrayList<Integer>();
    for (int i = 0; i < D.numInstances(); i++) {
        Instance x = D.instance(i);
        for (int j = 0; j < K; j++) {
            String y = encodeValue(x, indices[j]);
            try {
                D_.instance(i).setValue(j, y); // y = 
            } catch (Exception e) {
                // value not allowed
                deleteList.add(i); // mark it for deletion
                String y_close[] = NSR.getTopNSubsets(y, counts[j], n); // get N subsets
                for (int m = 0; m < y_close.length; m++) {
                    //System.out.println("add "+y_close[m]+" "+counts[j]);
                    Instance x_copy = (Instance) D_.instance(i).copy();
                    x_copy.setValue(j, y_close[m]);
                    x_copy.setWeight(1.0 / y_close.length);
                    D_.add(x_copy);
                }
            }
        }
    }
    // clean up
    Collections.sort(deleteList, Collections.reverseOrder());
    //System.out.println("Deleting "+deleteList.size()+" defunct instances.");
    for (int i : deleteList) {
        D_.delete(i);
    }
    // set class
    D_.setClassIndex(K);
    // done!
    D = null;
    return D_;
}

From source file:meka.filters.multilabel.SuperNodeFilter.java

License:Open Source License

/**
 * Merge Labels./*  w w  w .java  2 s . c  om*/
 *
 * @param   j    index 1 (assume that <code>j &lt; k</code>)
 * @param   k   index 2 (assume that <code>j &lt; k</code>)
 * @param   D   iInstances, with attributes in labeled by original index
 * @return       Instaces with attributes at j and k moved to position L as (j,k), with classIndex = L-1
 */
public static Instances mergeLabels(Instances D, int j, int k, int p) {
    int L = D.classIndex();

    HashMap<String, Integer> count = new HashMap<String, Integer>();

    Set<String> values = new HashSet<String>();
    for (int i = 0; i < D.numInstances(); i++) {
        String v = encodeValue(D.instance(i).stringValue(j), D.instance(i).stringValue(k));
        String w = "" + (int) D.instance(i).value(j) + (int) D.instance(i).value(k);
        //System.out.println("w = "+w);
        count.put(v, count.containsKey(v) ? count.get(v) + 1 : 1);
        values.add(encodeValue(D.instance(i).stringValue(j), D.instance(i).stringValue(k)));
    }
    //System.out.println("("+j+","+k+")"+values);
    System.out.print("pruned from " + count.size() + " to ");
    MLUtils.pruneCountHashMap(count, p);
    String y_max = (String) MLUtils.argmax(count); // @todo won't need this in the future
    System.out.println("" + count.size() + " with p = " + p);
    System.out.println("" + count);
    values = count.keySet();

    // Create and insert the new attribute
    D.insertAttributeAt(
            new Attribute(encodeClass(D.attribute(j).name(), D.attribute(k).name()), new ArrayList(values)), L);

    // Set values for the new attribute
    for (int i = 0; i < D.numInstances(); i++) {
        Instance x = D.instance(i);
        String y_jk = encodeValue(x.stringValue(j), x.stringValue(k));
        try {
            x.setValue(L, y_jk); // y_jk = 
        } catch (Exception e) {
            //x.setMissing(L);
            //D.delete(i);
            //i--;
            String y_close[] = getNeighbours(y_jk, count, 1); // A+B+NEG, A+C+NEG
            //System.out.println("OK, that value ("+y_jk+") didn't exist ... set the closests ones ...: "+Arrays.toString(y_close));
            int max_c = 0;
            for (String y_ : y_close) {
                int c = count.get(y_);
                if (c > max_c) {
                    max_c = c;
                    y_max = y_;
                }
            }
            //System.out.println("we actually found "+Arrays.toString(y_close)+" but will only set one for now (the one with the highest count) : "+y_max+" ...");
            x.setValue(L, y_max);
            // ok, that value didn't exist, set the maximum one (@TODO: set the nearest one)
        }
    }

    // Delete separate attributes
    D.deleteAttributeAt(k > j ? k : j);
    D.deleteAttributeAt(k > j ? j : k);

    // Set class index
    D.setClassIndex(L - 1);
    return D;
}

From source file:meka.filters.multilabel.SuperNodeFilter.java

License:Open Source License

public static void main(String[] argv) {
    try {/*from w  w w.j  a  v a  2  s  . co  m*/
        String fname = Utils.getOption('i', argv);
        Instances D = new Instances(new BufferedReader(new FileReader(fname)));
        SuperNodeFilter f = new SuperNodeFilter();
        int c = Integer.parseInt(Utils.getOption('c', argv));
        D.setClassIndex(c);
        System.out.println("" + f.process(D));
        //runFilter(new SuperNodeFilter(), argv);
    } catch (Exception e) {
        System.err.println("");
        e.printStackTrace();
        //System.exit(1);
    }
}

From source file:meka.filters.unsupervised.attribute.MekaClassAttributes.java

License:Open Source License

/**
 * Determines the output format based on the input format and returns 
 * this. In case the output format cannot be returned immediately, i.e.,
 * hasImmediateOutputFormat() returns false, then this method will called
 * from batchFinished() after the call of preprocess(Instances), in which,
 * e.g., statistics for the actual processing step can be gathered.
 *
 * @param inputFormat     the input format to base the output format on
 * @return                the output format
 * @throws Exception      in case the determination goes wrong
 *///from ww  w  .  j a  v a  2  s.  c o m
protected Instances determineOutputFormat(Instances inputFormat) throws Exception {
    int i;
    int[] indices;
    StringBuilder order;
    Instances output;

    m_AttributeIndices.setUpper(inputFormat.numAttributes() - 1);
    order = new StringBuilder();
    indices = m_AttributeIndices.getSelection();
    if (indices.length == 0)
        throw new WekaException("No attributes defined as class attributes!");
    for (i = 0; i < indices.length; i++) {
        if (i > 0)
            order.append(",");
        order.append("" + (indices[i] + 1));
    }
    for (i = 0; i < inputFormat.numAttributes(); i++) {
        if (m_AttributeIndices.isInRange(i))
            continue;
        order.append(",");
        order.append("" + (i + 1));
    }
    m_Reorder.setAttributeIndices(order.toString());
    m_Reorder.setInputFormat(inputFormat);

    output = m_Reorder.getOutputFormat();
    output.setClassIndex(indices.length);
    output.setRelationName("-C " + indices.length);

    return output;
}

From source file:meka.gui.dataviewer.DataViewerMainPanel.java

License:Open Source License

/**
 * displays some properties of the instances
 *//*  w  ww  .  ja  v a2  s . co m*/
public void showProperties() {
    DataPanel panel;
    ListSelectorDialog dialog;
    Vector<String> props;
    Instances inst;

    panel = getCurrentPanel();
    if (panel == null) {
        return;
    }

    inst = panel.getInstances();
    if (inst == null) {
        return;
    }
    if (inst.classIndex() < 0) {
        inst.setClassIndex(inst.numAttributes() - 1);
    }

    // get some data
    props = new Vector<String>();
    props.add("Filename: " + panel.getFilename());
    props.add("Relation name: " + inst.relationName());
    props.add("# of instances: " + inst.numInstances());
    props.add("# of attributes: " + inst.numAttributes());
    props.add("Class attribute: " + inst.classAttribute().name());
    props.add("# of class labels: " + inst.numClasses());

    dialog = new ListSelectorDialog(getParentFrame(), new JList(props));
    dialog.showDialog();
}

From source file:meka.gui.explorer.classify.EditTestData.java

License:Open Source License

/**
 * Returns the action lister to use in the menu.
 *
 * @return          the listener//w w  w . j  a  v  a2  s.com
 */
public ActionListener getActionListener(final ClassifyTab owner) {
    return new ActionListener() {
        @Override
        public void actionPerformed(ActionEvent e) {
            ViewerDialog dialog;
            int result;
            Instances copy;
            Instances newInstances;

            copy = new Instances(owner.getTestData());
            dialog = new ViewerDialog(null);
            dialog.setSize(800, 600);
            dialog.setLocationRelativeTo(owner);
            result = dialog.showDialog(copy);
            if (result == ViewerDialog.APPROVE_OPTION) {
                // if class was not set before, reset it again after use of filter
                newInstances = dialog.getInstances();
                if (owner.getTestData().classIndex() < 0)
                    newInstances.setClassIndex(-1);
                owner.setTestData(newInstances);

            }
        }
    };
}