Example usage for weka.core Instances classIndex

Introduction

In this page you can find the example usage for weka.core Instances classIndex.

Prototype


publicint classIndex()

Source Link

Document

Returns the class attribute's index.

Usage

From source file:miRdup.WekaModule.java

License:Open Source License

public static void attributeSelection(File arff, String outfile) {
    // load data// w  w w  . j a  va2  s .  c  om
    try {
        PrintWriter pw = new PrintWriter(new FileWriter(outfile));
        DataSource source = new DataSource(arff.toString());
        Instances data = source.getDataSet();
        if (data.classIndex() == -1) {
            data.setClassIndex(data.numAttributes() - 1);
        }

        AttributeSelection attrsel = new AttributeSelection();
        weka.attributeSelection.InfoGainAttributeEval eval = new weka.attributeSelection.InfoGainAttributeEval();

        weka.attributeSelection.Ranker rank = new weka.attributeSelection.Ranker();
        rank.setOptions(weka.core.Utils.splitOptions("-T -1.7976931348623157E308 -N -1"));
        if (Main.debug) {
            System.out.print("Model options: " + rank.getClass().getName().trim() + " ");
        }
        for (String s : rank.getOptions()) {
            System.out.print(s + " ");
        }
        attrsel.setEvaluator(eval);
        attrsel.setSearch(rank);
        attrsel.setFolds(10);

        attrsel.SelectAttributes(data);
        //attrsel.CrossValidateAttributes();

        System.out.println(attrsel.toResultsString());
        pw.println(attrsel.toResultsString());

        //evaluation.crossValidateModel(classifier, data, 10, new Random(1));
        pw.flush();
        pw.close();

    } catch (Exception e) {
        e.printStackTrace();
    }
}

From source file:ml.dataprocess.CorrelationAttributeEval.java

License:Open Source License

/**
 * Initializes an information gain attribute evaluator. Replaces missing
 * values with means/modes; Deletes instances with missing class values.
 * //from www  .  j a v a 2  s .c o m
 * @param data set of instances serving as training data
 * @throws Exception if the evaluator has not been generated successfully
 */
@Override
public void buildEvaluator(Instances data) throws Exception {
    data = new Instances(data);
    data.deleteWithMissingClass();

    ReplaceMissingValues rmv = new ReplaceMissingValues();
    rmv.setInputFormat(data);
    data = Filter.useFilter(data, rmv);

    int numClasses = data.classAttribute().numValues();
    int classIndex = data.classIndex();
    int numInstances = data.numInstances();
    m_correlations = new double[data.numAttributes()];
    /*
     * boolean hasNominals = false; boolean hasNumerics = false;
     */
    List<Integer> numericIndexes = new ArrayList<Integer>();
    List<Integer> nominalIndexes = new ArrayList<Integer>();
    if (m_detailedOutput) {
        m_detailedOutputBuff = new StringBuffer();
    }

    // TODO for instance weights (folded into computing weighted correlations)
    // add another dimension just before the last [2] (0 for 0/1 binary vector
    // and
    // 1 for corresponding instance weights for the 1's)
    double[][][] nomAtts = new double[data.numAttributes()][][];
    for (int i = 0; i < data.numAttributes(); i++) {
        if (data.attribute(i).isNominal() && i != classIndex) {
            nomAtts[i] = new double[data.attribute(i).numValues()][data.numInstances()];
            Arrays.fill(nomAtts[i][0], 1.0); // set zero index for this att to all
                                             // 1's
            nominalIndexes.add(i);
        } else if (data.attribute(i).isNumeric() && i != classIndex) {
            numericIndexes.add(i);
        }
    }

    // do the nominal attributes
    if (nominalIndexes.size() > 0) {
        for (int i = 0; i < data.numInstances(); i++) {
            Instance current = data.instance(i);
            for (int j = 0; j < current.numValues(); j++) {
                if (current.attribute(current.index(j)).isNominal() && current.index(j) != classIndex) {
                    // Will need to check for zero in case this isn't a sparse
                    // instance (unless we add 1 and subtract 1)
                    nomAtts[current.index(j)][(int) current.valueSparse(j)][i] += 1;
                    nomAtts[current.index(j)][0][i] -= 1;
                }
            }
        }
    }

    if (data.classAttribute().isNumeric()) {
        double[] classVals = data.attributeToDoubleArray(classIndex);

        // do the numeric attributes
        for (Integer i : numericIndexes) {
            double[] numAttVals = data.attributeToDoubleArray(i);
            m_correlations[i] = Utils.correlation(numAttVals, classVals, numAttVals.length);

            if (m_correlations[i] == 1.0) {
                // check for zero variance (useless numeric attribute)
                if (Utils.variance(numAttVals) == 0) {
                    m_correlations[i] = 0;
                }
            }
        }

        // do the nominal attributes
        if (nominalIndexes.size() > 0) {

            // now compute the correlations for the binarized nominal attributes
            for (Integer i : nominalIndexes) {
                double sum = 0;
                double corr = 0;
                double sumCorr = 0;
                double sumForValue = 0;

                if (m_detailedOutput) {
                    m_detailedOutputBuff.append("\n\n").append(data.attribute(i).name());
                }

                for (int j = 0; j < data.attribute(i).numValues(); j++) {
                    sumForValue = Utils.sum(nomAtts[i][j]);
                    corr = Utils.correlation(nomAtts[i][j], classVals, classVals.length);

                    // useless attribute - all instances have the same value
                    if (sumForValue == numInstances || sumForValue == 0) {
                        corr = 0;
                    }
                    if (corr < 0.0) {
                        corr = -corr;
                    }
                    sumCorr += sumForValue * corr;
                    sum += sumForValue;

                    if (m_detailedOutput) {
                        m_detailedOutputBuff.append("\n\t").append(data.attribute(i).value(j)).append(": ");
                        m_detailedOutputBuff.append(Utils.doubleToString(corr, 6));
                    }
                }
                m_correlations[i] = (sum > 0) ? sumCorr / sum : 0;
            }
        }
    } else {
        // class is nominal
        // TODO extra dimension for storing instance weights too
        double[][] binarizedClasses = new double[data.classAttribute().numValues()][data.numInstances()];

        // this is equal to the number of instances for all inst weights = 1
        double[] classValCounts = new double[data.classAttribute().numValues()];

        for (int i = 0; i < data.numInstances(); i++) {
            Instance current = data.instance(i);
            binarizedClasses[(int) current.classValue()][i] = 1;
        }
        for (int i = 0; i < data.classAttribute().numValues(); i++) {
            classValCounts[i] = Utils.sum(binarizedClasses[i]);
        }

        double sumClass = Utils.sum(classValCounts);

        // do numeric attributes first
        if (numericIndexes.size() > 0) {
            for (Integer i : numericIndexes) {
                double[] numAttVals = data.attributeToDoubleArray(i);
                double corr = 0;
                double sumCorr = 0;

                for (int j = 0; j < data.classAttribute().numValues(); j++) {
                    corr = Utils.correlation(numAttVals, binarizedClasses[j], numAttVals.length);
                    if (corr < 0.0) {
                        corr = -corr;
                    }

                    if (corr == 1.0) {
                        // check for zero variance (useless numeric attribute)
                        if (Utils.variance(numAttVals) == 0) {
                            corr = 0;
                        }
                    }

                    sumCorr += classValCounts[j] * corr;
                }
                m_correlations[i] = sumCorr / sumClass;
            }
        }

        if (nominalIndexes.size() > 0) {
            for (Integer i : nominalIndexes) {
                if (m_detailedOutput) {
                    m_detailedOutputBuff.append("\n\n").append(data.attribute(i).name());
                }

                double sumForAtt = 0;
                double corrForAtt = 0;
                for (int j = 0; j < data.attribute(i).numValues(); j++) {
                    double sumForValue = Utils.sum(nomAtts[i][j]);
                    double corr = 0;
                    double sumCorr = 0;
                    double avgCorrForValue = 0;

                    sumForAtt += sumForValue;
                    for (int k = 0; k < numClasses; k++) {

                        // corr between value j and class k
                        corr = Utils.correlation(nomAtts[i][j], binarizedClasses[k],
                                binarizedClasses[k].length);

                        // useless attribute - all instances have the same value
                        if (sumForValue == numInstances || sumForValue == 0) {
                            corr = 0;
                        }
                        if (corr < 0.0) {
                            corr = -corr;
                        }
                        sumCorr += classValCounts[k] * corr;
                    }
                    avgCorrForValue = sumCorr / sumClass;
                    corrForAtt += sumForValue * avgCorrForValue;

                    if (m_detailedOutput) {
                        m_detailedOutputBuff.append("\n\t").append(data.attribute(i).value(j)).append(": ");
                        m_detailedOutputBuff.append(Utils.doubleToString(avgCorrForValue, 6));
                    }
                }

                // the weighted average corr for att i as
                // a whole (wighted by value frequencies)
                m_correlations[i] = (sumForAtt > 0) ? corrForAtt / sumForAtt : 0;
            }
        }
    }

    if (m_detailedOutputBuff != null && m_detailedOutputBuff.length() > 0) {
        m_detailedOutputBuff.append("\n");
    }
}

From source file:moa.classifiers.AbstractClassifier.java

License:Open Source License

/**
 * Gets the index of the attribute in a set of instances,
 * given the index of the attribute in the learner.
 * //from w  w  w .j  av  a2s. com
 * @param index the index of the attribute in the learner
 * @param insts the instances
 * @return the index of the attribute in the instances
 */
protected static int modelAttIndexToInstanceAttIndex(int index, Instances insts) {
    return insts.classIndex() > index ? index : index + 1;
}

From source file:moa.classifiers.macros.TACNB.java

License:Open Source License

public void initHeader(Instances dataset) {
    int numLabels = this.numOldLabelsOption.getValue();
    Attribute target = dataset.classAttribute();

    List<String> possibleValues = new ArrayList<String>();
    int n = target.numValues();
    for (int i = 0; i < n; i++) {
        possibleValues.add(target.value(i));
    }//from  w  w w  . j a  v  a2 s .c o  m

    ArrayList<Attribute> attrs = new ArrayList<Attribute>(numLabels + dataset.numAttributes());
    for (int i = 0; i < numLabels; i++) {
        attrs.add(new Attribute(target.name() + "_" + i, possibleValues));
    }
    for (int i = 0; i < dataset.numAttributes(); i++) {
        attrs.add((Attribute) dataset.attribute(i).copy());
    }
    this.header = new Instances("extended_" + dataset.relationName(), attrs, 0);
    this.header.setClassIndex(numLabels + dataset.classIndex());
}

From source file:moa.classifiers.novelClass.AbstractNovelClassClassifier.java

License:Apache License

final public static Instances augmentInstances(Instances datum) {
    ArrayList<Attribute> attInfo = new ArrayList<>(datum.numAttributes());
    for (int aIdx = 0; aIdx < datum.numAttributes(); aIdx++) {
        Attribute a = datum.attribute(aIdx).copy(datum.attribute(aIdx).name());
        if ((aIdx == datum.classIndex()) && (a.indexOfValue(NOVEL_LABEL_STR) < 0)) { // only if we don't already have these
            List<String> values = new ArrayList<>(a.numValues() + 2);
            for (int i = 0; i < a.numValues(); ++i) {
                values.add(a.value(i));/*  w w w . ja  va2  s  . co  m*/
            }
            values.add(OUTLIER_LABEL_STR);
            values.add(NOVEL_LABEL_STR);
            a = new Attribute(a.name(), values, a.getMetadata());
        }
        attInfo.add(a);
    }
    String relationshipName = NOVEL_CLASS_INSTANCE_RELATIONSHIP_TYPE + "-" + datum.relationName();
    Instances ret = new Instances(relationshipName, attInfo, 1);
    ret.setClassIndex(datum.classIndex());

    return ret;
}

From source file:moa.classifiers.rules.GeRules.java

License:Open Source License

public static void main(String[] args) throws Exception {
    // TODO Auto-generated method stub
    //ArffFileStream arffFileStream = new ArffFileStream("resources/UCI_KDD/nominal/cmc.arff", -1);

    // read arff file WEKA way
    DataSource source = new DataSource("data/cmc.arff");

    // stream generator
    RandomTreeGenerator treeGenerator = new RandomTreeGenerator();
    treeGenerator.numClassesOption.setValue(5);
    treeGenerator.numNumericsOption.setValue(0);
    treeGenerator.prepareForUse();//from w  ww .ja  v  a 2s .c  o  m

    // HoeffdingRules classifier
    GeRules gErules = new GeRules();
    gErules.prepareForUse();

    // load data into instances set
    Instances data = source.getDataSet();

    // setting class attribute if the data format does not provide this information
    // For example, the XRFF format saves the class attribute information as well
    if (data.classIndex() == -1)
        data.setClassIndex(data.numAttributes() - 1);

    // Using Prism classifier
    //hoeffdingRules.learnRules(Collections.list(data.enumerateInstances()));
    for (Instance instance : Collections.list(data.enumerateInstances())) {
        gErules.trainOnInstanceImpl(instance);

        gErules.correctlyClassifies(instance);
    }

    Instance anInstance = Collections.list(data.enumerateInstances()).get(10);
    System.out.println(anInstance);
    for (Rule aRule : gErules.RulesCoveredInstance(anInstance)) {

        System.out.println(aRule.printRule());
    }

    for (Rule aRule : gErules.rulesList) {
        System.out.println(aRule.printRule());
    }

}

From source file:moa.clusterers.AbstractClusterer.java

License:Open Source License

protected static int modelAttIndexToInstanceAttIndex(int index, Instances insts) {
    return insts.classIndex() > index ? index : index + 1;
}

From source file:moa.tud.ke.patching.AdaptivePatchingAdwin.java

/**
 * Creates a copy of the instances and redefines the problem such that it is
 * now important to classify the wrongly classified instances
 *//*from  w ww.jav  a 2 s  . c  o  m*/
private Instances redefineProblem(Instances data) {

    Instances redefInstances = new Instances(data); // deep copy of instance store

    //        System.out.println(reDefinedClasses.attributeStats(reDefinedClasses.classIndex()));
    //        System.out.println("Before filtering: "+wrongData.size());
    double predictedClass = 0;

    int oldClassIndex = redefInstances.classIndex();

    try {

        Iterator inst = redefInstances.iterator();

        int num_instances = 0;
        int num_patch = 0;
        int num_base = 0;

        while (inst.hasNext()) {
            weka.core.Instance a = (weka.core.Instance) inst.next();

            predictedClass = this.baseClassifier.classifyInstance(a); // Achtung: das hier muss "base" bleiben!!

            if (predictedClass == a.classValue()) {
                a.setClassValue(1);
                if (num_instances < batchSize.getValue()) {
                    num_base++;
                }
            } else {
                a.setClassValue(0);
                if (num_instances < batchSize.getValue()) {
                    num_patch++;
                }
            }
            num_instances++;
        }

        System.out.println("Patchklassifizierer: " + (float) num_patch / batchSize.getValue() * 100);
        System.out.println("Baselassifizierer: " + (float) num_base / batchSize.getValue() * 100);

        if (this.useBaseClassAsAttribute.isSet()) {
            redefInstances = addBaseClassToInstances(redefInstances);
        }

        redefInstances = changeClassToWrongRight(redefInstances);

    } catch (Exception e) {
        System.err.println("Error while classifying instance in redefineProblem");
        System.err.println(e.getMessage());
        System.err.println(e.fillInStackTrace());
        System.exit(987654);
    }

    return redefInstances;
}

From source file:moa.tud.ke.patching.AdaptivePatchingAdwin.java

/**
 * Modify the instances and insert into them the class which the base
 * classifier had them classified as./*www. ja  v  a2s  .com*/
 *
 * @return
 */
private Instances addBaseClassToInstances(Instances origInstances) {

    Instances moddedInstances = new Instances(origInstances); // deep copy

    double predictedClass = 0;

    // create new attribute
    try {
        moddedInstances = copyClassAttribute(moddedInstances, "baseLabel", 1); // das was hier attribute 1 ist, wird zu index 0 
        moddedInstances.setClassIndex(origInstances.classIndex() + 1);
    } catch (Exception e) {
        System.err.println("Error while copying class Attribute for baseLabel");
        System.err.println(e.getMessage());
    }

    Iterator inst = origInstances.iterator();
    int index = 0;
    while (inst.hasNext()) {

        weka.core.Instance a = (weka.core.Instance) inst.next();
        weka.core.Instance target = moddedInstances.instance(index);

        predictedClass = 0;
        try {
            predictedClass = this.baseClassifier.classifyInstance(a); // Achtung: das hier muss "base" bleiben!!
        } catch (Exception e) {
            System.err.println("Error while classifying instance in addBaseClassToInstances");
            System.err.println(a);
            System.err.println(e.getMessage());
        }

        target.setValue(0, predictedClass); // index 0 ist attribute 1 
        index++;
    }

    return moddedInstances;
}

From source file:moa.tud.ke.patching.AdaptivePatchingAdwin.java

/**
 * Copies the class attribute to another position (first position)
 *
 * @param instances/*ww w .j  a va  2  s.com*/
 * @param newName
 * @param newAttributeIndex
 * @return
 * @throws Exception
 */
public static Instances copyClassAttribute(Instances instances, String newName, int newAttributeIndex)
        throws Exception {

    int whichAttribute = instances.classIndex();

    Add filter = new Add();
    filter.setAttributeIndex("" + newAttributeIndex);
    filter.setAttributeName(newName);

    // Copy nominal Attribute
    if (instances.attribute(whichAttribute).isNominal()) {
        String newNominalLabels = "";
        Boolean first = true;
        Enumeration<Object> o = instances.attribute(whichAttribute).enumerateValues();
        while (o.hasMoreElements()) {
            String s = (String) o.nextElement();
            if (!first) {
                newNominalLabels += ",";
            }
            newNominalLabels += s;
            first = false;
        }
        filter.setNominalLabels(newNominalLabels);
    }

    filter.setInputFormat(instances);
    instances = Filter.useFilter(instances, filter);
    return instances;
}