Example usage for weka.core Instances classIndex

Introduction

In this page you can find the example usage for weka.core Instances classIndex.

Prototype


publicint classIndex()

Source Link

Document

Returns the class attribute's index.

Usage

From source file:decisiontree.MyID3.java

private void makeTree(Instances data) {
    // Check if no instances have reached this node.  
    if (data.numInstances() == 0) {
        splitAttr = null;/*from  w w w . j  a  va 2 s.  c o  m*/
        leafValue = Double.NaN;
        leafDist = new double[data.numClasses()];
        return;
    }

    if (data.numDistinctValues(data.classIndex()) == 1) {
        leafValue = data.firstInstance().classValue();
        return;
    }

    // Compute attribute with maximum information gain.  
    double[] infoGains = new double[data.numAttributes()];
    Enumeration attEnum = data.enumerateAttributes();
    while (attEnum.hasMoreElements()) {
        Attribute att = (Attribute) attEnum.nextElement();
        infoGains[att.index()] = computeInfoGain(data, att);
    }
    splitAttr = data.attribute(maxIndex(infoGains));

    // Make leaf if information gain is zero.   
    // Otherwise create successors.  
    if (Utils.eq(infoGains[splitAttr.index()], 0)) {
        splitAttr = null;
        leafDist = new double[data.numClasses()];
        Enumeration instEnum = data.enumerateInstances();
        while (instEnum.hasMoreElements()) {
            Instance inst = (Instance) instEnum.nextElement();
            leafDist[(int) inst.classValue()]++;
        }
        normalize(leafDist);
        leafValue = Utils.maxIndex(leafDist);
        classAttr = data.classAttribute();
    } else {
        Instances[] splitData = splitData(data, splitAttr);
        child = new MyID3[splitAttr.numValues()];
        for (int j = 0; j < splitAttr.numValues(); j++) {
            child[j] = new MyID3();
            child[j].makeTree(splitData[j]);
        }
    }
}

From source file:dewaweebtreeclassifier.Sujeong.java

public void buildTree(Instances instances) throws java.lang.Exception {
    if (instances.numAttributes() < 1) {
        throw new Exception("Data instances need to have minimum of 1 attribute.");
    } else if (instances.numAttributes() == 1) {
        this.value = instances.meanOrMode(instances.classIndex());
    } else {/*from  w w w  .  j av  a2  s. co m*/
        Enumeration attrs = instances.enumerateAttributes();
        double informationGain = 0.0;
        while (attrs.hasMoreElements()) {
            Attribute attr = (Attribute) attrs.nextElement();
            double tmpGain = computeGain(instances, attr);
            if (tmpGain > informationGain) {
                bestAttr = attr;
                informationGain = tmpGain;
            }
        }
        if (bestAttr != null) {
            double mode = instances.meanOrMode(instances.classIndex());
            Instances[] chunks = splitInstancesOnAttribute(instances, bestAttr);
            children = new Sujeong[chunks.length];
            for (int i = 0; i < chunks.length; ++i) {
                Instances chunk = chunks[i];
                Sujeong child = new Sujeong();
                children[i] = child;
                if (chunk.numInstances() > 0)
                    child.buildTree(chunk);
                else
                    child.value = mode;
            }
        } else {
            this.value = instances.meanOrMode(instances.classIndex());
        }
    }
}

From source file:DiversifyQuery.DivTopK.java

public Instances transformData(Instances data) throws Exception {
    ArrayList<LegacyShapelet> shapelets = new ArrayList<>();
    for (int i = 5; i <= 1; i--) {
        if (DResultSet.get(i).result.size() == i) {
            shapelets.addAll(DResultSet.get(i).result);
        }//  w  ww  . ja  v a2  s.  c o m
    }
    if (shapelets.size() < 1) {
        throw new Exception(
                "Number of shapelets initialised incorrectly - please select value of k greater than or equal to 1 (Usage: setNumberOfShapelets");
    }

    if (data.classIndex() < 0) {
        throw new Exception("Require that the class be set for the ShapeletTransform");
    }

    Instances output = determineOutputFormat(data, shapelets);

    // for each data, get distance to each shapelet and create new instance
    for (int i = 0; i < data.numInstances(); i++) { // for each data
        Instance toAdd = new Instance(shapelets.size() + 1);
        int shapeletNum = 0;
        for (LegacyShapelet s : shapelets) {
            double dist = subsequenceDistance(s.content, data.instance(i));
            toAdd.setValue(shapeletNum++, dist);
        }
        toAdd.setValue(shapelets.size(), data.instance(i).classValue());
        output.add(toAdd);
    }
    return output;
}

From source file:DiversifyQuery.DivTopK.java

/**
 * Sets the format of the filtered instances that are output. I.e. will
 * include k attributes each shapelet distance and a class value
 *
 * @param inputFormat the format of the input data
 * @return a new Instances object in the desired output format
 * @throws Exception if all required parameters of the filter are not
 * initialised correctly//  w w  w .ja v  a2 s  .com
 */
protected Instances determineOutputFormat(Instances inputFormat, ArrayList<LegacyShapelet> shapelets)
        throws Exception {

    //Set up instances size and format.
    //int length = this.numShapelets;
    int length = shapelets.size();
    FastVector atts = new FastVector();
    String name;
    for (int i = 0; i < length; i++) {
        name = "Shapelet_" + i;
        atts.addElement(new Attribute(name));
    }

    if (inputFormat.classIndex() >= 0) { //Classification set, set class
        //Get the class values as a fast vector
        Attribute target = inputFormat.attribute(inputFormat.classIndex());

        FastVector vals = new FastVector(target.numValues());
        for (int i = 0; i < target.numValues(); i++) {
            vals.addElement(target.value(i));
        }
        atts.addElement(new Attribute(inputFormat.attribute(inputFormat.classIndex()).name(), vals));
    }
    Instances result = new Instances("Shapelets" + inputFormat.relationName(), atts,
            inputFormat.numInstances());
    if (inputFormat.classIndex() >= 0) {
        result.setClassIndex(result.numAttributes() - 1);
    }
    return result;
}

From source file:DiversifyTopKShaepelet.DiversifyTopKShaepelet.java

/**
 * Sets the format of the filtered instances that are output. I.e. will
 * include k attributes each shapelet distance and a class value
 *
 * @param inputFormat the format of the input data
 * @return a new Instances object in the desired output format
 * @throws Exception if all required parameters of the filter are not
 * initialised correctly/* w  w w  . j a v  a 2  s.  c o m*/
 */
@Override
protected Instances determineOutputFormat(Instances inputFormat) throws Exception {

    if (this.numShapelets < 1) {
        throw new Exception(
                "ShapeletFilter not initialised correctly - please specify a value of k that is greater than or equal to 1");
    }

    //Set up instances size and format.
    //int length = this.numShapelets;
    int length = this.shapelets.size();
    FastVector atts = new FastVector();
    String name;
    for (int i = 0; i < length; i++) {
        name = "Shapelet_" + i;
        atts.addElement(new Attribute(name));
    }

    if (inputFormat.classIndex() >= 0) { //Classification set, set class
        //Get the class values as a fast vector
        Attribute target = inputFormat.attribute(inputFormat.classIndex());

        FastVector vals = new FastVector(target.numValues());
        for (int i = 0; i < target.numValues(); i++) {
            vals.addElement(target.value(i));
        }
        atts.addElement(new Attribute(inputFormat.attribute(inputFormat.classIndex()).name(), vals));
    }
    Instances result = new Instances("Shapelets" + inputFormat.relationName(), atts,
            inputFormat.numInstances());
    if (inputFormat.classIndex() >= 0) {
        result.setClassIndex(result.numAttributes() - 1);
    }
    return result;
}

From source file:DiversifyTopKShaepelet.DiversifyTopKShaepelet.java

@Override
public Instances process(Instances data) throws Exception {
    if (this.numShapelets < 1) {
        throw new Exception(
                "Number of shapelets initialised incorrectly - please select value of k greater than or equal to 1 (Usage: setNumberOfShapelets");
    }//  w w w . j  a  v a  2s  .c o m

    int maxPossibleLength = data.instance(0).numAttributes() - 1;
    if (data.classIndex() < 0) {
        throw new Exception("Require that the class be set for the ShapeletTransform");
    }

    if (this.minShapeletLength < 1 || this.maxShapeletLength < 1
            || this.maxShapeletLength < this.minShapeletLength || this.maxShapeletLength > maxPossibleLength) {
        throw new Exception("Shapelet length parameters initialised incorrectly");
    }

    //Sort data in round robin order
    dataSourceIDs = new int[data.numInstances()];

    for (int i = 0; i < data.numInstances(); i++) {
        dataSourceIDs[i] = i;
    }
    //        data = roundRobinData(data, dataSourceIDs);

    if (this.shapeletsTrained == false) { // shapelets discovery has not yet been caried out, so do so
        this.shapelets = findDiversityTopKShapelets(this.numShapelets, data, this.minShapeletLength,
                this.maxShapeletLength); // get k shapelets ATTENTION
        this.shapeletsTrained = true;
        if (!supressOutput) {
            System.out.println(shapelets.size() + " Shapelets have been generated");
        }
    }

    Instances output = determineOutputFormat(data);

    // for each data, get distance to each shapelet and create new instance
    for (int i = 0; i < data.numInstances(); i++) { // for each data
        Instance toAdd = new Instance(this.shapelets.size() + 1);
        int shapeletNum = 0;
        for (LegacyShapelet s : this.shapelets) {
            double dist = subseqDistance(s.content, data.instance(i));
            toAdd.setValue(shapeletNum++, dist);
        }
        toAdd.setValue(this.shapelets.size(), data.instance(i).classValue());
        output.add(toAdd);
    }
    return output;
}

From source file:edu.columbia.cs.ltrie.sampling.queries.generation.ChiSquaredWithYatesCorrectionAttributeEval.java

License:Open Source License

/**
 * Initializes a chi-squared attribute evaluator.
 * Discretizes all attributes that are numeric.
 *
 * @param data set of instances serving as training data 
 * @throws Exception if the evaluator has not been 
 * generated successfully//from w w w.  j a  va2 s.co m
 */
public void buildEvaluator(Instances data) throws Exception {

    // can evaluator handle data?
    getCapabilities().testWithFail(data);

    int classIndex = data.classIndex();
    int numInstances = data.numInstances();

    if (!m_Binarize) {
        Discretize disTransform = new Discretize();
        disTransform.setUseBetterEncoding(true);
        disTransform.setInputFormat(data);
        data = Filter.useFilter(data, disTransform);
    } else {
        NumericToBinary binTransform = new NumericToBinary();
        binTransform.setInputFormat(data);
        data = Filter.useFilter(data, binTransform);
    }
    int numClasses = data.attribute(classIndex).numValues();

    // Reserve space and initialize counters
    double[][][] counts = new double[data.numAttributes()][][];
    for (int k = 0; k < data.numAttributes(); k++) {
        if (k != classIndex) {
            int numValues = data.attribute(k).numValues();
            counts[k] = new double[numValues + 1][numClasses + 1];
        }
    }

    // Initialize counters
    double[] temp = new double[numClasses + 1];
    for (int k = 0; k < numInstances; k++) {
        Instance inst = data.instance(k);
        if (inst.classIsMissing()) {
            temp[numClasses] += inst.weight();
        } else {
            temp[(int) inst.classValue()] += inst.weight();
        }
    }
    for (int k = 0; k < counts.length; k++) {
        if (k != classIndex) {
            for (int i = 0; i < temp.length; i++) {
                counts[k][0][i] = temp[i];
            }
        }
    }

    // Get counts
    for (int k = 0; k < numInstances; k++) {
        Instance inst = data.instance(k);
        for (int i = 0; i < inst.numValues(); i++) {
            if (inst.index(i) != classIndex) {
                if (inst.isMissingSparse(i) || inst.classIsMissing()) {
                    if (!inst.isMissingSparse(i)) {
                        counts[inst.index(i)][(int) inst.valueSparse(i)][numClasses] += inst.weight();
                        counts[inst.index(i)][0][numClasses] -= inst.weight();
                    } else if (!inst.classIsMissing()) {
                        counts[inst.index(i)][data.attribute(inst.index(i)).numValues()][(int) inst
                                .classValue()] += inst.weight();
                        counts[inst.index(i)][0][(int) inst.classValue()] -= inst.weight();
                    } else {
                        counts[inst.index(i)][data.attribute(inst.index(i)).numValues()][numClasses] += inst
                                .weight();
                        counts[inst.index(i)][0][numClasses] -= inst.weight();
                    }
                } else {
                    counts[inst.index(i)][(int) inst.valueSparse(i)][(int) inst.classValue()] += inst.weight();
                    counts[inst.index(i)][0][(int) inst.classValue()] -= inst.weight();
                }
            }
        }
    }

    // distribute missing counts if required
    if (m_missing_merge) {

        for (int k = 0; k < data.numAttributes(); k++) {
            if (k != classIndex) {
                int numValues = data.attribute(k).numValues();

                // Compute marginals
                double[] rowSums = new double[numValues];
                double[] columnSums = new double[numClasses];
                double sum = 0;
                for (int i = 0; i < numValues; i++) {
                    for (int j = 0; j < numClasses; j++) {
                        rowSums[i] += counts[k][i][j];
                        columnSums[j] += counts[k][i][j];
                    }
                    sum += rowSums[i];
                }

                if (Utils.gr(sum, 0)) {
                    double[][] additions = new double[numValues][numClasses];

                    // Compute what needs to be added to each row
                    for (int i = 0; i < numValues; i++) {
                        for (int j = 0; j < numClasses; j++) {
                            additions[i][j] = (rowSums[i] / sum) * counts[k][numValues][j];
                        }
                    }

                    // Compute what needs to be added to each column
                    for (int i = 0; i < numClasses; i++) {
                        for (int j = 0; j < numValues; j++) {
                            additions[j][i] += (columnSums[i] / sum) * counts[k][j][numClasses];
                        }
                    }

                    // Compute what needs to be added to each cell
                    for (int i = 0; i < numClasses; i++) {
                        for (int j = 0; j < numValues; j++) {
                            additions[j][i] += (counts[k][j][i] / sum) * counts[k][numValues][numClasses];
                        }
                    }

                    // Make new contingency table
                    double[][] newTable = new double[numValues][numClasses];
                    for (int i = 0; i < numValues; i++) {
                        for (int j = 0; j < numClasses; j++) {
                            newTable[i][j] = counts[k][i][j] + additions[i][j];
                        }
                    }
                    counts[k] = newTable;
                }
            }
        }
    }

    // Compute chi-squared values
    m_ChiSquareds = new double[data.numAttributes()];
    for (int i = 0; i < data.numAttributes(); i++) {
        if (i != classIndex) {
            m_ChiSquareds[i] = chiVal(ContingencyTables.reduceMatrix(counts[i]));
        }
    }
}

From source file:edu.oregonstate.eecs.mcplan.abstraction.EvaluateSimilarityFunction.java

License:Open Source License

public static ClusterContingencyTable evaluateClassifier(final Classifier classifier, final Instances test) {
    try {/*from w w w  . java 2s  . c  o m*/
        final Map<Integer, Set<RealVector>> Umap = new TreeMap<Integer, Set<RealVector>>();
        final Map<Integer, Set<RealVector>> Vmap = new TreeMap<Integer, Set<RealVector>>();

        final Remove rm_filter = new Remove();
        rm_filter.setAttributeIndicesArray(new int[] { test.classIndex() });
        rm_filter.setInputFormat(test);

        for (final Instance i : test) {
            rm_filter.input(i);
            final double[] phi = rm_filter.output().toDoubleArray();
            //            final double[] phi = WekaUtil.unlabeledFeatures( i );

            final int cluster = (int) classifier.classifyInstance(i);
            Set<RealVector> u = Umap.get(cluster);
            if (u == null) {
                u = new HashSet<RealVector>();
                Umap.put(cluster, u);
            }
            u.add(new ArrayRealVector(phi));

            final int true_label = (int) i.classValue();
            Set<RealVector> v = Vmap.get(true_label);
            if (v == null) {
                v = new HashSet<RealVector>();
                Vmap.put(true_label, v);
            }
            v.add(new ArrayRealVector(phi));
        }

        final ArrayList<Set<RealVector>> U = new ArrayList<Set<RealVector>>();
        for (final Map.Entry<Integer, Set<RealVector>> e : Umap.entrySet()) {
            U.add(e.getValue());
        }

        final ArrayList<Set<RealVector>> V = new ArrayList<Set<RealVector>>();
        for (final Map.Entry<Integer, Set<RealVector>> e : Vmap.entrySet()) {
            V.add(e.getValue());
        }

        return new ClusterContingencyTable(U, V);
    } catch (final RuntimeException ex) {
        throw ex;
    } catch (final Exception ex) {
        throw new RuntimeException(ex);
    }
}

From source file:edu.oregonstate.eecs.mcplan.abstraction.WekaUtil.java

License:Open Source License

/**
 * Returns a list of all Attributes, *excluding* the class attribute if
 * it is set.//from ww  w  .j  a v a 2s.  co m
 * @param instances
 * @return
 */
public static ArrayList<Attribute> extractUnlabeledAttributes(final Instances instances) {
    final ArrayList<Attribute> attributes = new ArrayList<Attribute>(instances.numAttributes());
    for (int i = 0; i < instances.numAttributes(); ++i) {
        if (i == instances.classIndex()) {
            continue;
        }
        attributes.add(instances.attribute(i));
    }
    return attributes;
}

From source file:edu.oregonstate.eecs.mcplan.abstraction.WekaUtil.java

License:Open Source License

/**
 * Load an ARFF dataset.//from   ww w. j  a v  a 2s  .  co m
 *
 * Adapted from:
 * http://weka.wikispaces.com/Use+WEKA+in+your+Java+code
 * @param file
 * @return
 */
public static Instances readLabeledDataset(final File file) {
    try {
        final DataSource source = new DataSource(file.getPath());
        final Instances data = source.getDataSet();
        // setting class attribute if the data format does not provide this information
        // For example, the XRFF format saves the class attribute information as well
        if (data.classIndex() == -1) {
            data.setClassIndex(data.numAttributes() - 1);
        }
        return data;
    } catch (final Exception ex) {
        throw new RuntimeException(ex);
    }
}