Example usage for weka.core Instances classAttribute

List of usage examples for weka.core Instances classAttribute

Introduction

In this page you can find the example usage for weka.core Instances classAttribute.

Prototype


publicAttribute classAttribute() 

Source Link

Document

Returns the class attribute.

Usage

From source file:iris.ID3.java

public void makeLikeAWhat(Instances instances) {
    // Create storage for different info gains
    double[] infoGains = new double[instances.numAttributes()];
    // Enumerate through attributes to find the best gain
    Enumeration attributeEnum = instances.enumerateAttributes();
    while (attributeEnum.hasMoreElements()) {
        // Loop through attributes, adding gain to infoGains array
        Attribute att = (Attribute) attributeEnum.nextElement();
        infoGains[att.index()] = infoGain(instances, att);
    }//from  ww w . ja v  a 2 s. c om
    // Use maxIndex to find the highest info gain in the array
    highestInfoGain = instances.attribute(Utils.maxIndex(infoGains));

    // Make a leaf if there is no more info to gain
    // Otherwise, create children
    // Check if there is no more info to gain
    if (Utils.eq(infoGains[highestInfoGain.index()], 0)) {
        highestInfoGain = null;
        // Instantiate maxDistribution
        maxDistribution = new double[instances.numClasses()];
        // Set up enumerator for instances
        Enumeration instanceEnum = instances.enumerateInstances();
        // Tally classes
        while (instanceEnum.hasMoreElements()) {
            Instance instance = (Instance) instanceEnum.nextElement();
            maxDistribution[(int) instance.classValue()]++;
        }
        // Normalize data for easier manipulation
        Utils.normalize(maxDistribution);
        // Get the max index of the distrubtion
        classValue = Utils.maxIndex(maxDistribution);
        // Save class attribute
        classAttribute = instances.classAttribute();
    }
    // Create children
    else {
        // Split best attribute into bins
        Instances[] bins = makeBins(instances, highestInfoGain);
        // Create nodes
        children = new ID3[highestInfoGain.numValues()];
        for (int i = 0; i < highestInfoGain.numValues(); i++) {
            children[i] = new ID3();
            children[i].makeLikeAWhat(bins[i]);
        }
    }
}

From source file:it.unisa.gitdm.evaluation.WekaEvaluator.java

private static void evaluateModel(String baseFolderPath, String projectName, Classifier pClassifier,
        Instances pInstances, String pModelName, String pClassifierName) throws Exception {

    // other options
    int folds = 10;

    // randomize data
    Random rand = new Random(42);
    Instances randData = new Instances(pInstances);
    randData.randomize(rand);//from w  ww.ja  v a2  s. c om
    if (randData.classAttribute().isNominal()) {
        randData.stratify(folds);
    }

    // perform cross-validation and add predictions
    Instances predictedData = null;
    Evaluation eval = new Evaluation(randData);

    int positiveValueIndexOfClassFeature = 0;
    for (int n = 0; n < folds; n++) {
        Instances train = randData.trainCV(folds, n);
        Instances test = randData.testCV(folds, n);
        // the above code is used by the StratifiedRemoveFolds filter, the
        // code below by the Explorer/Experimenter:
        // Instances train = randData.trainCV(folds, n, rand);

        int classFeatureIndex = 0;
        for (int i = 0; i < train.numAttributes(); i++) {
            if (train.attribute(i).name().equals("isBuggy")) {
                classFeatureIndex = i;
                break;
            }
        }

        Attribute classFeature = train.attribute(classFeatureIndex);
        for (int i = 0; i < classFeature.numValues(); i++) {
            if (classFeature.value(i).equals("TRUE")) {
                positiveValueIndexOfClassFeature = i;
            }
        }

        train.setClassIndex(classFeatureIndex);
        test.setClassIndex(classFeatureIndex);

        // build and evaluate classifier
        pClassifier.buildClassifier(train);
        eval.evaluateModel(pClassifier, test);

        // add predictions
        //           AddClassification filter = new AddClassification();
        //           filter.setClassifier(pClassifier);
        //           filter.setOutputClassification(true);
        //           filter.setOutputDistribution(true);
        //           filter.setOutputErrorFlag(true);
        //           filter.setInputFormat(train);
        //           Filter.useFilter(train, filter); 
        //           Instances pred = Filter.useFilter(test, filter); 
        //           if (predictedData == null)
        //             predictedData = new Instances(pred, 0);
        //           
        //           for (int j = 0; j < pred.numInstances(); j++)
        //             predictedData.add(pred.instance(j));
    }
    double accuracy = (eval.numTruePositives(positiveValueIndexOfClassFeature)
            + eval.numTrueNegatives(positiveValueIndexOfClassFeature))
            / (eval.numTruePositives(positiveValueIndexOfClassFeature)
                    + eval.numFalsePositives(positiveValueIndexOfClassFeature)
                    + eval.numFalseNegatives(positiveValueIndexOfClassFeature)
                    + eval.numTrueNegatives(positiveValueIndexOfClassFeature));

    double fmeasure = 2 * ((eval.precision(positiveValueIndexOfClassFeature)
            * eval.recall(positiveValueIndexOfClassFeature))
            / (eval.precision(positiveValueIndexOfClassFeature)
                    + eval.recall(positiveValueIndexOfClassFeature)));
    File wekaOutput = new File(baseFolderPath + projectName + "/predictors.csv");
    PrintWriter pw1 = new PrintWriter(wekaOutput);

    pw1.write(accuracy + ";" + eval.precision(positiveValueIndexOfClassFeature) + ";"
            + eval.recall(positiveValueIndexOfClassFeature) + ";" + fmeasure + ";"
            + eval.areaUnderROC(positiveValueIndexOfClassFeature));

    System.out.println(projectName + ";" + pClassifierName + ";" + pModelName + ";"
            + eval.numTruePositives(positiveValueIndexOfClassFeature) + ";"
            + eval.numFalsePositives(positiveValueIndexOfClassFeature) + ";"
            + eval.numFalseNegatives(positiveValueIndexOfClassFeature) + ";"
            + eval.numTrueNegatives(positiveValueIndexOfClassFeature) + ";" + accuracy + ";"
            + eval.precision(positiveValueIndexOfClassFeature) + ";"
            + eval.recall(positiveValueIndexOfClassFeature) + ";" + fmeasure + ";"
            + eval.areaUnderROC(positiveValueIndexOfClassFeature) + "\n");
}

From source file:liac.igmn.loader.DataLoader.java

License:Open Source License

/**
 * Carrega dataset a partir de arquivo ARFF e binariza os atributos nominais.
 * Assume que a classe seja o ultimo atributo.
 * //from  ww w .ja va  2s . c  om
 * @param filename path do arquivo
 * @return dataset
 * @throws DataLoaderException lancado quando o arquivo nao e encontrado
 * ou quando ocorre algum erro de IO
 */
public static Dataset loadARFF(String filename) throws DataLoaderException {
    Dataset dataset = new Dataset();
    try {
        ArffLoader loader = new ArffLoader();

        loader.setSource(new File(filename));
        Instances data = loader.getDataSet();
        Instances m_Intances = new Instances(data);

        data.setClassIndex(data.numAttributes() - 1);

        String[] classes = new String[data.numClasses()];
        for (int i = 0; i < data.numClasses(); i++)
            classes[i] = data.classAttribute().value(i);
        dataset.setClassesNames(classes);

        NominalToBinary filter = new NominalToBinary();
        filter.setInputFormat(m_Intances);
        filter.setOptions(new String[] { "-A" });
        m_Intances = Filter.useFilter(m_Intances, filter);

        int inputSize = m_Intances.numAttributes() - data.numClasses();

        dataset.setInputSize(inputSize);
        dataset.setNumClasses(data.numClasses());

        dataset.setWekaDataset(m_Intances);
    } catch (IOException e) {
        throw new DataLoaderException("Arquivo no encontrado", e.getCause());
    } catch (Exception e) {
        throw new DataLoaderException("Falha na converso do arquivo", e.getCause());
    }

    return dataset;
}

From source file:lu.lippmann.cdb.common.gui.dataset.InstancesLoaderDialogFactory.java

License:Open Source License

private static Instances showDialog(final Component parent, final boolean setClass) throws Exception {
    final Preferences prefs = Preferences.userRoot().node("CadralDecisionBuild");
    final String path = prefs.get(REG_KEY, WekaDataAccessUtil.DEFAULT_SAMPLE_DIR);

    final JFileChooser fc = new JFileChooser();
    fc.setCurrentDirectory(new File(path));
    final int returnVal = fc.showOpenDialog(parent);
    if (returnVal == JFileChooser.APPROVE_OPTION) {
        final File file = fc.getSelectedFile();
        if (file != null) {
            prefs.put(REG_KEY, file.getPath());
            final Instances ds = WekaDataAccessUtil.loadInstancesFromARFFOrCSVFile(file);
            final Attribute defaultClassAttr = ds.classIndex() >= 0 ? ds.classAttribute() : ds.attribute(0);
            ds.setClassIndex(-1);//from   w  w w  . j  a v a 2  s.c om
            ds.setRelationName(file.getPath());
            final List<String> attributesNames = new ArrayList<String>();
            final Enumeration<?> e = ds.enumerateAttributes();
            while (e.hasMoreElements()) {
                final Attribute attr = (Attribute) e.nextElement();
                attributesNames.add(attr.name());
            }

            if (setClass) {
                final String s = (String) JOptionPane.showInputDialog(parent,
                        "Select the class attribute for '" + file.getName() + "' (default:'"
                                + defaultClassAttr.name() + "'): ",
                        "Class selection", JOptionPane.QUESTION_MESSAGE, null, // icon
                        attributesNames.toArray(), attributesNames.get(attributesNames.size() - 1));
                if (s != null) {
                    ds.setClass(ds.attribute(s));
                } else {
                    //Otherwise no class defined and CACHE attributeClass => No class index defined after cancel + retry
                    ds.setClass(defaultClassAttr);
                    return null;
                }
            } else {
                ds.setClass(defaultClassAttr);
            }
            return ds;
        } else
            throw new Exception();
    } else
        return null;
}

From source file:lu.lippmann.cdb.datasetview.DatasetView.java

License:Open Source License

public DatasetView setDataSet(final Instances pdataSet) {
    if (pdataSet.classIndex() != -1 && !pdataSet.classAttribute().isNominal())
        pdataSet.setClassIndex(-1);/*w  w  w .  j a v  a 2  s  . c o m*/

    if (this.initialDataSet == null) {
        this.initialDataSet = pdataSet;
        this.initialCompleteness = new CompletenessComputer(this.initialDataSet);
        this.dataCompletenessProgressBar.setMaximum(pdataSet.numInstances() * pdataSet.numAttributes());
        reinitDataCompleteness();
    }

    this.dataSet = pdataSet;

    if (!filtered)
        this.notFilteredDataSet = pdataSet;

    updateClassSelectionMenu();
    this.supervisedTransformPane.setVisible(pdataSet.classIndex() != -1);

    for (final TabView tv : tabViews) {
        tv.update(dataSet);
    }

    try {
        updateFiltersPane(dataSet);
    } catch (Exception e) {
        eventPublisher.publish(new ErrorOccuredEvent("Error when updating filters", e));
    }

    updateTooltipShowingDatasetDimensions();

    return this;
}

From source file:lu.lippmann.cdb.datasetview.tabs.WeightedMapOfDecisionTreesTabView.java

License:Open Source License

/**
 * {@inheritDoc}/*from w  w  w . j  av a2 s  .c  o  m*/
 */
@Override
public void update0(final Instances dataSet) throws Exception {
    if (this.mp != null)
        this.panel.remove(this.mp);

    if (this.cl != null)
        this.slider.removeChangeListener(cl);
    //if (this.cl!=null) this.slider.removeChangeListener(cl);

    this.cl = new ChangeListener() {
        @Override
        public void stateChanged(final ChangeEvent e) {
            if (!slider.getValueIsAdjusting()) {
                dtFactory = new J48DecisionTreeFactory(slider.getValue() / 100d, false);
                update(dataSet);
            }
        }
    };
    this.slider.addChangeListener(cl);

    final double frameWidth = this.panel.getSize().getWidth() * 0.95d;
    final double frameHeight = this.panel.getSize().getHeight() * 0.95d;

    final ListOrderedMap<JComponent, Integer> mapPanels = new ListOrderedMap<JComponent, Integer>();

    final String oldSelected;
    if (this.attrSelectionCombo.getSelectedItem() == null) {
        oldSelected = dataSet.classAttribute().name();
    } else {
        final Attribute oldAttr = dataSet.attribute(this.attrSelectionCombo.getSelectedItem().toString());
        if (oldAttr != null) {
            oldSelected = oldAttr.name();
        } else {
            oldSelected = dataSet.classAttribute().name();
        }
    }
    final int idx = dataSet.attribute(oldSelected).index();
    final Set<Object> presentValues = WekaDataStatsUtil.getNominalRepartition(dataSet, idx).keySet();
    for (final Object o : presentValues) {
        final Instances part = WekaDataProcessingUtil.filterDataSetOnNominalValue(dataSet, idx, o.toString());
        final DecisionTree dti = dtFactory.buildDecisionTree(part);

        final int ratio = 100 * part.numInstances() / dataSet.numInstances();
        final GraphView myGraph = DecisionTreeToGraphViewHelper.buildGraphView(dti, eventPublisher,
                commandDispatcher);
        myGraph.hideSharedLabel();
        myGraph.addMetaInfo("size=" + dti.getSize(), "");
        myGraph.addMetaInfo("depth=" + dti.getDepth(), "");
        myGraph.addMetaInfo("err=" + FormatterUtil.DECIMAL_FORMAT.format(100d * dti.getErrorRate()) + "%", "");

        final JButton openInEditorButton = new JButton("Edit");
        openInEditorButton.addActionListener(new ActionListener() {
            @Override
            public void actionPerformed(ActionEvent e) {
                GraphUtil.importDecisionTreeInEditor(dtFactory, part, applicationContext, eventPublisher,
                        commandDispatcher);
            }
        });
        myGraph.addMetaInfoComponent(openInEditorButton);

        myGraph.fitGraphToSubPanel(frameWidth - 10 * presentValues.size(), frameHeight - 10, ratio);
        mapPanels.put((JComponent) myGraph, ratio);

    }
    this.mp = new MultiPanel(mapPanels, (int) frameWidth, (int) frameHeight,
            this.withWeightCheckBox.isSelected());

    this.panel.add(this.mp, BorderLayout.CENTER);

    if (this.attrSelectionCombo.getActionListeners().length > 0) {
        this.attrSelectionCombo.removeActionListener(attrSelectionComboListener);
    }
    if (this.withWeightCheckBox.getActionListeners().length > 0) {
        this.withWeightCheckBox.removeActionListener(attrSelectionComboListener);
    }

    this.attrSelectionCombo.removeAllItems();
    for (final Attribute attr : WekaDataStatsUtil.getNominalAttributesList(dataSet)) {
        this.attrSelectionCombo.addItem(attr.name());
    }
    this.attrSelectionCombo.setSelectedItem(oldSelected);

    this.attrSelectionComboListener = new ActionListener() {
        @Override
        public void actionPerformed(ActionEvent e) {
            update(dataSet);
        }
    };
    this.attrSelectionCombo.addActionListener(attrSelectionComboListener);
    this.withWeightCheckBox.addActionListener(attrSelectionComboListener);

}

From source file:lu.lippmann.cdb.datasetview.tasks.UnsupervisedFeatureSelectionTask.java

License:Open Source License

/**
 * {@inheritDoc}//from w  ww  .  ja v a 2 s .c  o  m
 */
@Override
Instances process0(final Instances dataSet) throws Exception {
    final int k;
    if (this.ratio == -1)
        k = getFeaturesCountFromInput(null, dataSet.numAttributes());
    else
        k = (int) Math.round(this.ratio * dataSet.numAttributes());

    final List<Integer> attrToKeep = WekaMachineLearningUtil.computeUnsupervisedFeaturesSelection(dataSet, k);
    if (!attrToKeep.contains(dataSet.classIndex()))
        attrToKeep.add(dataSet.classIndex());
    final int[] array = ArraysUtil.transform(attrToKeep);

    System.out.println("unsupervised fs -> before=" + dataSet.numAttributes() + " after=" + array.length);

    final Instances newds = WekaDataProcessingUtil.buildFilteredByAttributesDataSet(dataSet, array);
    final Attribute clsAttr = newds.attribute(dataSet.classAttribute().name());
    System.out.println(clsAttr + " " + dataSet.classAttribute().name());
    newds.setClass(clsAttr);
    return newds;
}

From source file:lu.lippmann.cdb.dt.ModelTreeFactory.java

License:Open Source License

/**
 * Main method.// w  ww . j  a va2 s . c o m
 * @param args command line arguments
 */
public static void main(final String[] args) {
    try {
        //final String f="./samples/csv/uci/winequality-red-simplified.csv";
        final String f = "./samples/csv/uci/winequality-white.csv";
        //final String f="./samples/arff/UCI/crimepredict.arff";
        final Instances dataSet = WekaDataAccessUtil.loadInstancesFromARFFOrCSVFile(new File(f));
        System.out.println(dataSet.classAttribute().isNumeric());

        final M5P rt = new M5P();
        //rt.setUnpruned(true);
        rt.setMinNumInstances(1000);
        rt.buildClassifier(dataSet);

        System.out.println(rt);

        System.out.println(rt.graph());

        final GraphWithOperations gwo = GraphUtil.buildGraphWithOperationsFromWekaRegressionString(rt.graph());
        System.out.println(gwo);
        System.out.println(new ASCIIGraphDsl().getDslString(gwo));

        final Evaluation eval = new Evaluation(dataSet);

        /*Field privateStringField = Evaluation.class.getDeclaredField("m_CoverageStatisticsAvailable");
        privateStringField.setAccessible(true);
        //privateStringField.get
        boolean fieldValue = privateStringField.getBoolean(eval);
        System.out.println("fieldValue = " + fieldValue);*/

        double[] d = eval.evaluateModel(rt, dataSet);
        System.out.println("PREDICTED -> " + FormatterUtil.buildStringFromArrayOfDoubles(d));

        System.out.println(eval.errorRate());
        System.out.println(eval.sizeOfPredictedRegions());

        System.out.println(eval.toSummaryString("", true));

        System.out.println(new DecisionTree(gwo, eval.errorRate()));
    } catch (Exception e) {
        e.printStackTrace();
    }
}

From source file:lu.lippmann.cdb.dt.RegressionTreeFactory.java

License:Open Source License

/**
 * Main method./*  ww w  . j  a va 2 s . c  o  m*/
 * @param args command line arguments
 */
public static void main(final String[] args) {
    try {
        final String f = "./samples/csv/uci/winequality-red.csv";
        //final String f="./samples/arff/UCI/crimepredict.arff";
        final Instances dataSet = WekaDataAccessUtil.loadInstancesFromARFFOrCSVFile(new File(f));
        System.out.println(dataSet.classAttribute().isNumeric());

        final REPTree rt = new REPTree();
        rt.setMaxDepth(3);
        rt.buildClassifier(dataSet);

        System.out.println(rt);

        //System.out.println(rt.graph());

        final GraphWithOperations gwo = GraphUtil.buildGraphWithOperationsFromWekaRegressionString(rt.graph());
        System.out.println(gwo);
        System.out.println(new ASCIIGraphDsl().getDslString(gwo));

        final Evaluation eval = new Evaluation(dataSet);

        /*Field privateStringField = Evaluation.class.getDeclaredField("m_CoverageStatisticsAvailable");
        privateStringField.setAccessible(true);
        //privateStringField.get
        boolean fieldValue = privateStringField.getBoolean(eval);
        System.out.println("fieldValue = " + fieldValue);*/

        double[] d = eval.evaluateModel(rt, dataSet);
        System.out.println("PREDICTED -> " + FormatterUtil.buildStringFromArrayOfDoubles(d));

        System.out.println(eval.errorRate());
        System.out.println(eval.sizeOfPredictedRegions());

        System.out.println(eval.toSummaryString("", true));

        /*final String f2="./samples/csv/salary.csv";
        final Instances dataSet2=WekaDataAccessUtil.loadInstancesFromARFFOrCSVFile(new File(f2));
                
        final J48 j48=new J48();
        j48.buildClassifier(dataSet2);
        System.out.println(j48.graph());
        final GraphWithOperations gwo2=GraphUtil.buildGraphWithOperationsFromWekaString(j48.graph(),false);
        System.out.println(gwo2);*/

        System.out.println(new DecisionTree(gwo, eval.errorRate()));
    } catch (Exception e) {
        e.printStackTrace();
    }
}

From source file:lu.lippmann.cdb.ext.hydviga.ui.HydroDatasetView.java

License:Open Source License

public HydroDatasetView setDataSet(final Instances pdataSet) {
    if (pdataSet.classIndex() != -1 && !pdataSet.classAttribute().isNominal())
        pdataSet.setClassIndex(-1);//  w w  w.  jav a 2  s.  c om

    if (this.initialDataSet == null) {
        this.initialDataSet = pdataSet;
        this.initialCompleteness = new CompletenessComputer(this.initialDataSet);
        this.dataCompletenessProgressBar.setMaximum(pdataSet.numInstances() * pdataSet.numAttributes());
        reinitDataCompleteness();
    }

    this.dataSet = pdataSet;

    if (!filtered)
        this.notFilteredDataSet = pdataSet;

    //updateClassSelectionMenu();
    this.supervisedTransformPane.setVisible(pdataSet.classIndex() != -1);

    for (final TabView tv : tabViews) {
        tv.update(dataSet);
    }

    try {
        updateFiltersPane(dataSet);
    } catch (Exception e) {
        eventPublisher.publish(new ErrorOccuredEvent("Error when updating filters", e));
    }

    updateTooltipShowingDatasetDimensions();

    return this;
}