List of usage examples for weka.core Instances classAttribute
publicAttribute classAttribute()
From source file:iris.ID3.java
public void makeLikeAWhat(Instances instances) { // Create storage for different info gains double[] infoGains = new double[instances.numAttributes()]; // Enumerate through attributes to find the best gain Enumeration attributeEnum = instances.enumerateAttributes(); while (attributeEnum.hasMoreElements()) { // Loop through attributes, adding gain to infoGains array Attribute att = (Attribute) attributeEnum.nextElement(); infoGains[att.index()] = infoGain(instances, att); }//from ww w . ja v a 2 s. c om // Use maxIndex to find the highest info gain in the array highestInfoGain = instances.attribute(Utils.maxIndex(infoGains)); // Make a leaf if there is no more info to gain // Otherwise, create children // Check if there is no more info to gain if (Utils.eq(infoGains[highestInfoGain.index()], 0)) { highestInfoGain = null; // Instantiate maxDistribution maxDistribution = new double[instances.numClasses()]; // Set up enumerator for instances Enumeration instanceEnum = instances.enumerateInstances(); // Tally classes while (instanceEnum.hasMoreElements()) { Instance instance = (Instance) instanceEnum.nextElement(); maxDistribution[(int) instance.classValue()]++; } // Normalize data for easier manipulation Utils.normalize(maxDistribution); // Get the max index of the distrubtion classValue = Utils.maxIndex(maxDistribution); // Save class attribute classAttribute = instances.classAttribute(); } // Create children else { // Split best attribute into bins Instances[] bins = makeBins(instances, highestInfoGain); // Create nodes children = new ID3[highestInfoGain.numValues()]; for (int i = 0; i < highestInfoGain.numValues(); i++) { children[i] = new ID3(); children[i].makeLikeAWhat(bins[i]); } } }
From source file:it.unisa.gitdm.evaluation.WekaEvaluator.java
private static void evaluateModel(String baseFolderPath, String projectName, Classifier pClassifier, Instances pInstances, String pModelName, String pClassifierName) throws Exception { // other options int folds = 10; // randomize data Random rand = new Random(42); Instances randData = new Instances(pInstances); randData.randomize(rand);//from w ww.ja v a2 s. c om if (randData.classAttribute().isNominal()) { randData.stratify(folds); } // perform cross-validation and add predictions Instances predictedData = null; Evaluation eval = new Evaluation(randData); int positiveValueIndexOfClassFeature = 0; for (int n = 0; n < folds; n++) { Instances train = randData.trainCV(folds, n); Instances test = randData.testCV(folds, n); // the above code is used by the StratifiedRemoveFolds filter, the // code below by the Explorer/Experimenter: // Instances train = randData.trainCV(folds, n, rand); int classFeatureIndex = 0; for (int i = 0; i < train.numAttributes(); i++) { if (train.attribute(i).name().equals("isBuggy")) { classFeatureIndex = i; break; } } Attribute classFeature = train.attribute(classFeatureIndex); for (int i = 0; i < classFeature.numValues(); i++) { if (classFeature.value(i).equals("TRUE")) { positiveValueIndexOfClassFeature = i; } } train.setClassIndex(classFeatureIndex); test.setClassIndex(classFeatureIndex); // build and evaluate classifier pClassifier.buildClassifier(train); eval.evaluateModel(pClassifier, test); // add predictions // AddClassification filter = new AddClassification(); // filter.setClassifier(pClassifier); // filter.setOutputClassification(true); // filter.setOutputDistribution(true); // filter.setOutputErrorFlag(true); // filter.setInputFormat(train); // Filter.useFilter(train, filter); // Instances pred = Filter.useFilter(test, filter); // if (predictedData == null) // predictedData = new Instances(pred, 0); // // for (int j = 0; j < pred.numInstances(); j++) // predictedData.add(pred.instance(j)); } double accuracy = (eval.numTruePositives(positiveValueIndexOfClassFeature) + eval.numTrueNegatives(positiveValueIndexOfClassFeature)) / (eval.numTruePositives(positiveValueIndexOfClassFeature) + eval.numFalsePositives(positiveValueIndexOfClassFeature) + eval.numFalseNegatives(positiveValueIndexOfClassFeature) + eval.numTrueNegatives(positiveValueIndexOfClassFeature)); double fmeasure = 2 * ((eval.precision(positiveValueIndexOfClassFeature) * eval.recall(positiveValueIndexOfClassFeature)) / (eval.precision(positiveValueIndexOfClassFeature) + eval.recall(positiveValueIndexOfClassFeature))); File wekaOutput = new File(baseFolderPath + projectName + "/predictors.csv"); PrintWriter pw1 = new PrintWriter(wekaOutput); pw1.write(accuracy + ";" + eval.precision(positiveValueIndexOfClassFeature) + ";" + eval.recall(positiveValueIndexOfClassFeature) + ";" + fmeasure + ";" + eval.areaUnderROC(positiveValueIndexOfClassFeature)); System.out.println(projectName + ";" + pClassifierName + ";" + pModelName + ";" + eval.numTruePositives(positiveValueIndexOfClassFeature) + ";" + eval.numFalsePositives(positiveValueIndexOfClassFeature) + ";" + eval.numFalseNegatives(positiveValueIndexOfClassFeature) + ";" + eval.numTrueNegatives(positiveValueIndexOfClassFeature) + ";" + accuracy + ";" + eval.precision(positiveValueIndexOfClassFeature) + ";" + eval.recall(positiveValueIndexOfClassFeature) + ";" + fmeasure + ";" + eval.areaUnderROC(positiveValueIndexOfClassFeature) + "\n"); }
From source file:liac.igmn.loader.DataLoader.java
License:Open Source License
/** * Carrega dataset a partir de arquivo ARFF e binariza os atributos nominais. * Assume que a classe seja o ultimo atributo. * //from ww w .ja va 2s . c om * @param filename path do arquivo * @return dataset * @throws DataLoaderException lancado quando o arquivo nao e encontrado * ou quando ocorre algum erro de IO */ public static Dataset loadARFF(String filename) throws DataLoaderException { Dataset dataset = new Dataset(); try { ArffLoader loader = new ArffLoader(); loader.setSource(new File(filename)); Instances data = loader.getDataSet(); Instances m_Intances = new Instances(data); data.setClassIndex(data.numAttributes() - 1); String[] classes = new String[data.numClasses()]; for (int i = 0; i < data.numClasses(); i++) classes[i] = data.classAttribute().value(i); dataset.setClassesNames(classes); NominalToBinary filter = new NominalToBinary(); filter.setInputFormat(m_Intances); filter.setOptions(new String[] { "-A" }); m_Intances = Filter.useFilter(m_Intances, filter); int inputSize = m_Intances.numAttributes() - data.numClasses(); dataset.setInputSize(inputSize); dataset.setNumClasses(data.numClasses()); dataset.setWekaDataset(m_Intances); } catch (IOException e) { throw new DataLoaderException("Arquivo no encontrado", e.getCause()); } catch (Exception e) { throw new DataLoaderException("Falha na converso do arquivo", e.getCause()); } return dataset; }
From source file:lu.lippmann.cdb.common.gui.dataset.InstancesLoaderDialogFactory.java
License:Open Source License
private static Instances showDialog(final Component parent, final boolean setClass) throws Exception { final Preferences prefs = Preferences.userRoot().node("CadralDecisionBuild"); final String path = prefs.get(REG_KEY, WekaDataAccessUtil.DEFAULT_SAMPLE_DIR); final JFileChooser fc = new JFileChooser(); fc.setCurrentDirectory(new File(path)); final int returnVal = fc.showOpenDialog(parent); if (returnVal == JFileChooser.APPROVE_OPTION) { final File file = fc.getSelectedFile(); if (file != null) { prefs.put(REG_KEY, file.getPath()); final Instances ds = WekaDataAccessUtil.loadInstancesFromARFFOrCSVFile(file); final Attribute defaultClassAttr = ds.classIndex() >= 0 ? ds.classAttribute() : ds.attribute(0); ds.setClassIndex(-1);//from w w w . j a v a 2 s.c om ds.setRelationName(file.getPath()); final List<String> attributesNames = new ArrayList<String>(); final Enumeration<?> e = ds.enumerateAttributes(); while (e.hasMoreElements()) { final Attribute attr = (Attribute) e.nextElement(); attributesNames.add(attr.name()); } if (setClass) { final String s = (String) JOptionPane.showInputDialog(parent, "Select the class attribute for '" + file.getName() + "' (default:'" + defaultClassAttr.name() + "'): ", "Class selection", JOptionPane.QUESTION_MESSAGE, null, // icon attributesNames.toArray(), attributesNames.get(attributesNames.size() - 1)); if (s != null) { ds.setClass(ds.attribute(s)); } else { //Otherwise no class defined and CACHE attributeClass => No class index defined after cancel + retry ds.setClass(defaultClassAttr); return null; } } else { ds.setClass(defaultClassAttr); } return ds; } else throw new Exception(); } else return null; }
From source file:lu.lippmann.cdb.datasetview.DatasetView.java
License:Open Source License
public DatasetView setDataSet(final Instances pdataSet) { if (pdataSet.classIndex() != -1 && !pdataSet.classAttribute().isNominal()) pdataSet.setClassIndex(-1);/*w w w . j a v a 2 s . c o m*/ if (this.initialDataSet == null) { this.initialDataSet = pdataSet; this.initialCompleteness = new CompletenessComputer(this.initialDataSet); this.dataCompletenessProgressBar.setMaximum(pdataSet.numInstances() * pdataSet.numAttributes()); reinitDataCompleteness(); } this.dataSet = pdataSet; if (!filtered) this.notFilteredDataSet = pdataSet; updateClassSelectionMenu(); this.supervisedTransformPane.setVisible(pdataSet.classIndex() != -1); for (final TabView tv : tabViews) { tv.update(dataSet); } try { updateFiltersPane(dataSet); } catch (Exception e) { eventPublisher.publish(new ErrorOccuredEvent("Error when updating filters", e)); } updateTooltipShowingDatasetDimensions(); return this; }
From source file:lu.lippmann.cdb.datasetview.tabs.WeightedMapOfDecisionTreesTabView.java
License:Open Source License
/** * {@inheritDoc}/*from w w w . j av a2 s .c o m*/ */ @Override public void update0(final Instances dataSet) throws Exception { if (this.mp != null) this.panel.remove(this.mp); if (this.cl != null) this.slider.removeChangeListener(cl); //if (this.cl!=null) this.slider.removeChangeListener(cl); this.cl = new ChangeListener() { @Override public void stateChanged(final ChangeEvent e) { if (!slider.getValueIsAdjusting()) { dtFactory = new J48DecisionTreeFactory(slider.getValue() / 100d, false); update(dataSet); } } }; this.slider.addChangeListener(cl); final double frameWidth = this.panel.getSize().getWidth() * 0.95d; final double frameHeight = this.panel.getSize().getHeight() * 0.95d; final ListOrderedMap<JComponent, Integer> mapPanels = new ListOrderedMap<JComponent, Integer>(); final String oldSelected; if (this.attrSelectionCombo.getSelectedItem() == null) { oldSelected = dataSet.classAttribute().name(); } else { final Attribute oldAttr = dataSet.attribute(this.attrSelectionCombo.getSelectedItem().toString()); if (oldAttr != null) { oldSelected = oldAttr.name(); } else { oldSelected = dataSet.classAttribute().name(); } } final int idx = dataSet.attribute(oldSelected).index(); final Set<Object> presentValues = WekaDataStatsUtil.getNominalRepartition(dataSet, idx).keySet(); for (final Object o : presentValues) { final Instances part = WekaDataProcessingUtil.filterDataSetOnNominalValue(dataSet, idx, o.toString()); final DecisionTree dti = dtFactory.buildDecisionTree(part); final int ratio = 100 * part.numInstances() / dataSet.numInstances(); final GraphView myGraph = DecisionTreeToGraphViewHelper.buildGraphView(dti, eventPublisher, commandDispatcher); myGraph.hideSharedLabel(); myGraph.addMetaInfo("size=" + dti.getSize(), ""); myGraph.addMetaInfo("depth=" + dti.getDepth(), ""); myGraph.addMetaInfo("err=" + FormatterUtil.DECIMAL_FORMAT.format(100d * dti.getErrorRate()) + "%", ""); final JButton openInEditorButton = new JButton("Edit"); openInEditorButton.addActionListener(new ActionListener() { @Override public void actionPerformed(ActionEvent e) { GraphUtil.importDecisionTreeInEditor(dtFactory, part, applicationContext, eventPublisher, commandDispatcher); } }); myGraph.addMetaInfoComponent(openInEditorButton); myGraph.fitGraphToSubPanel(frameWidth - 10 * presentValues.size(), frameHeight - 10, ratio); mapPanels.put((JComponent) myGraph, ratio); } this.mp = new MultiPanel(mapPanels, (int) frameWidth, (int) frameHeight, this.withWeightCheckBox.isSelected()); this.panel.add(this.mp, BorderLayout.CENTER); if (this.attrSelectionCombo.getActionListeners().length > 0) { this.attrSelectionCombo.removeActionListener(attrSelectionComboListener); } if (this.withWeightCheckBox.getActionListeners().length > 0) { this.withWeightCheckBox.removeActionListener(attrSelectionComboListener); } this.attrSelectionCombo.removeAllItems(); for (final Attribute attr : WekaDataStatsUtil.getNominalAttributesList(dataSet)) { this.attrSelectionCombo.addItem(attr.name()); } this.attrSelectionCombo.setSelectedItem(oldSelected); this.attrSelectionComboListener = new ActionListener() { @Override public void actionPerformed(ActionEvent e) { update(dataSet); } }; this.attrSelectionCombo.addActionListener(attrSelectionComboListener); this.withWeightCheckBox.addActionListener(attrSelectionComboListener); }
From source file:lu.lippmann.cdb.datasetview.tasks.UnsupervisedFeatureSelectionTask.java
License:Open Source License
/** * {@inheritDoc}//from w ww . ja v a 2 s .c o m */ @Override Instances process0(final Instances dataSet) throws Exception { final int k; if (this.ratio == -1) k = getFeaturesCountFromInput(null, dataSet.numAttributes()); else k = (int) Math.round(this.ratio * dataSet.numAttributes()); final List<Integer> attrToKeep = WekaMachineLearningUtil.computeUnsupervisedFeaturesSelection(dataSet, k); if (!attrToKeep.contains(dataSet.classIndex())) attrToKeep.add(dataSet.classIndex()); final int[] array = ArraysUtil.transform(attrToKeep); System.out.println("unsupervised fs -> before=" + dataSet.numAttributes() + " after=" + array.length); final Instances newds = WekaDataProcessingUtil.buildFilteredByAttributesDataSet(dataSet, array); final Attribute clsAttr = newds.attribute(dataSet.classAttribute().name()); System.out.println(clsAttr + " " + dataSet.classAttribute().name()); newds.setClass(clsAttr); return newds; }
From source file:lu.lippmann.cdb.dt.ModelTreeFactory.java
License:Open Source License
/** * Main method.// w ww . j a va2 s . c o m * @param args command line arguments */ public static void main(final String[] args) { try { //final String f="./samples/csv/uci/winequality-red-simplified.csv"; final String f = "./samples/csv/uci/winequality-white.csv"; //final String f="./samples/arff/UCI/crimepredict.arff"; final Instances dataSet = WekaDataAccessUtil.loadInstancesFromARFFOrCSVFile(new File(f)); System.out.println(dataSet.classAttribute().isNumeric()); final M5P rt = new M5P(); //rt.setUnpruned(true); rt.setMinNumInstances(1000); rt.buildClassifier(dataSet); System.out.println(rt); System.out.println(rt.graph()); final GraphWithOperations gwo = GraphUtil.buildGraphWithOperationsFromWekaRegressionString(rt.graph()); System.out.println(gwo); System.out.println(new ASCIIGraphDsl().getDslString(gwo)); final Evaluation eval = new Evaluation(dataSet); /*Field privateStringField = Evaluation.class.getDeclaredField("m_CoverageStatisticsAvailable"); privateStringField.setAccessible(true); //privateStringField.get boolean fieldValue = privateStringField.getBoolean(eval); System.out.println("fieldValue = " + fieldValue);*/ double[] d = eval.evaluateModel(rt, dataSet); System.out.println("PREDICTED -> " + FormatterUtil.buildStringFromArrayOfDoubles(d)); System.out.println(eval.errorRate()); System.out.println(eval.sizeOfPredictedRegions()); System.out.println(eval.toSummaryString("", true)); System.out.println(new DecisionTree(gwo, eval.errorRate())); } catch (Exception e) { e.printStackTrace(); } }
From source file:lu.lippmann.cdb.dt.RegressionTreeFactory.java
License:Open Source License
/** * Main method./* ww w . j a va 2 s . c o m*/ * @param args command line arguments */ public static void main(final String[] args) { try { final String f = "./samples/csv/uci/winequality-red.csv"; //final String f="./samples/arff/UCI/crimepredict.arff"; final Instances dataSet = WekaDataAccessUtil.loadInstancesFromARFFOrCSVFile(new File(f)); System.out.println(dataSet.classAttribute().isNumeric()); final REPTree rt = new REPTree(); rt.setMaxDepth(3); rt.buildClassifier(dataSet); System.out.println(rt); //System.out.println(rt.graph()); final GraphWithOperations gwo = GraphUtil.buildGraphWithOperationsFromWekaRegressionString(rt.graph()); System.out.println(gwo); System.out.println(new ASCIIGraphDsl().getDslString(gwo)); final Evaluation eval = new Evaluation(dataSet); /*Field privateStringField = Evaluation.class.getDeclaredField("m_CoverageStatisticsAvailable"); privateStringField.setAccessible(true); //privateStringField.get boolean fieldValue = privateStringField.getBoolean(eval); System.out.println("fieldValue = " + fieldValue);*/ double[] d = eval.evaluateModel(rt, dataSet); System.out.println("PREDICTED -> " + FormatterUtil.buildStringFromArrayOfDoubles(d)); System.out.println(eval.errorRate()); System.out.println(eval.sizeOfPredictedRegions()); System.out.println(eval.toSummaryString("", true)); /*final String f2="./samples/csv/salary.csv"; final Instances dataSet2=WekaDataAccessUtil.loadInstancesFromARFFOrCSVFile(new File(f2)); final J48 j48=new J48(); j48.buildClassifier(dataSet2); System.out.println(j48.graph()); final GraphWithOperations gwo2=GraphUtil.buildGraphWithOperationsFromWekaString(j48.graph(),false); System.out.println(gwo2);*/ System.out.println(new DecisionTree(gwo, eval.errorRate())); } catch (Exception e) { e.printStackTrace(); } }
From source file:lu.lippmann.cdb.ext.hydviga.ui.HydroDatasetView.java
License:Open Source License
public HydroDatasetView setDataSet(final Instances pdataSet) { if (pdataSet.classIndex() != -1 && !pdataSet.classAttribute().isNominal()) pdataSet.setClassIndex(-1);// w w w. jav a 2 s. c om if (this.initialDataSet == null) { this.initialDataSet = pdataSet; this.initialCompleteness = new CompletenessComputer(this.initialDataSet); this.dataCompletenessProgressBar.setMaximum(pdataSet.numInstances() * pdataSet.numAttributes()); reinitDataCompleteness(); } this.dataSet = pdataSet; if (!filtered) this.notFilteredDataSet = pdataSet; //updateClassSelectionMenu(); this.supervisedTransformPane.setVisible(pdataSet.classIndex() != -1); for (final TabView tv : tabViews) { tv.update(dataSet); } try { updateFiltersPane(dataSet); } catch (Exception e) { eventPublisher.publish(new ErrorOccuredEvent("Error when updating filters", e)); } updateTooltipShowingDatasetDimensions(); return this; }