List of usage examples for weka.core Instances instance
publicInstance instance(int index)
From source file:id3.MyID3.java
/** * Algoritma untuk menghitung distribusi kelas * @param instances// w ww .j av a2 s .c o m * @return distributionClass counter */ public double[] classDistribution(Instances instances) { // Compute class distribution counter from instances double[] distributionClass = new double[instances.numClasses()]; for (int i = 0; i < instances.numInstances(); i++) { distributionClass[(int) instances.instance(i).classValue()]++; } return distributionClass; }
From source file:id3.MyID3.java
/** * Membagi dataset menurut atribute value * @param data instance/* ww w .j av a2 s.com*/ * @param att atribut input * @return instance hasil split */ public Instances[] splitData(Instances data, Attribute att) { Instances[] instancesSplitBasedAttribute = new Instances[att.numValues()]; for (int i = 0; i < att.numValues(); i++) { instancesSplitBasedAttribute[i] = new Instances(data, data.numInstances()); } for (int i = 0; i < data.numInstances(); i++) { instancesSplitBasedAttribute[(int) data.instance(i).value(att)].add(data.instance(i)); } return instancesSplitBasedAttribute; }
From source file:id3classifier.ID3Classifiers.java
@Override public void buildClassifier(Instances instances) throws Exception { // create list of instances of size instances' number of instances // create list of attributes of size instances' number of attributes List<Instance> instanceList = new ArrayList<>(instances.numInstances()); List<Attribute> attributeList = new ArrayList<>(instances.numAttributes()); // from index 0 to instances' number of instances, add instances' current // instance to the list of instances... mouthfull for (int i = 0; i < instances.numInstances(); i++) { instanceList.add(instances.instance(i)); }/*w w w .jav a 2 s. c o m*/ // from index 0 to instances' number of attributes, if the index is not // equal to instances' class index... for (int i = 0; i < instances.numAttributes(); i++) { if (i != instances.classIndex()) { // add instances' current attribute to the attribute list attributeList.add(instances.attribute(i)); } } // set tree equal to the tree built by buildTree() using the instance // list and the attribute list tree = buildTree(instanceList, attributeList); }
From source file:id3j48.WekaAccess.java
public static void classify(String filename, Classifier classifier) throws Exception { Instances input = readArff(filename); input.setClassIndex(input.numAttributes() - 1); for (int i = 0; i < input.numInstances(); i++) { double classLabel = classifier.classifyInstance(input.instance(i)); input.instance(i).setClassValue(classLabel); System.out.println("Instance: " + input.instance(i)); System.out.println("Class: " + input.classAttribute().value((int) classLabel)); }//from w w w . j av a 2 s .c om try (BufferedWriter writer = new BufferedWriter( new FileWriter(classifiedFolder + File.separator + filename))) { writer.write(input.toString()); writer.newLine(); writer.flush(); } }
From source file:intensityclustering.IntensityClustering.java
/** * Draws the 2D Histogram Plot in the IntensityClustering. X-Axsis is * intensity value of chanel 2 image (where the stained nuclei are). Y-axis * are relative frequencies of present nuclei. * * @param tss The TMAspots whose nuclei are considered (both gold-standard * and estimated nuclei)./* w w w . j a v a 2 s . c o m*/ * @param doAlsoClustering If true, the TMApoints are also clustered * according to the histogram. */ void drawNucleiIntensities2D(List<TMAspot> tss, boolean doAlsoClustering) { // draw the plot Plot2DPanel plot; if (((java.awt.BorderLayout) (jPanel9.getLayout())) .getLayoutComponent(java.awt.BorderLayout.CENTER) != null) { plot = (Plot2DPanel) ((java.awt.BorderLayout) (jPanel9.getLayout())) .getLayoutComponent(java.awt.BorderLayout.CENTER); plot.removeAllPlots(); plot.removeAllPlotables(); } else { plot = new Plot2DPanel(PlotPanel.SOUTH); plot.setAxisLabels("Intensity", "Frequency"); plot.plotCanvas.setBackground(jPanel9.getBackground()); plot.plotLegend.setBackground(jPanel9.getBackground()); plot.plotToolBar.setBackground(plot.plotCanvas.getBackground()); } if (((java.awt.BorderLayout) (jPanel9.getLayout())) .getLayoutComponent(java.awt.BorderLayout.CENTER) == null) { jPanel9.add(plot, java.awt.BorderLayout.CENTER); jPanel15.setBackground(plot.plotCanvas.getBackground()); jPanel15.setVisible(true); validate(); pack(); } if (tss.size() > 0) { try { this.setCursor(Cursor.getPredefinedCursor(Cursor.WAIT_CURSOR)); List<Integer> intensities = new ArrayList<>(); int intensity; int min = Integer.parseInt(jTextField1.getText()); int max = Integer.parseInt(jTextField16.getText()); for (TMAspot ts : tss) { //TODO: GET THE CHANNEL 2 Image //BufferedImage img = ts.getBufferedImage(TMAspot.SHOW_CHANNEL2_IMAGE, false); BufferedImage img = ts.getBufferedImage(false); // img can be null if color deconvolution has not been performed, yet. if (img != null) { List<TMApoint> tps = ts.getPoints(); for (TMALabel tp : tps) { intensity = TMAspot.getAverageColorAtPoint(img, tp.x, tp.y, ts.getParam_r(), false) .getRed(); if (intensity >= min && intensity <= max) { intensities.add(intensity); } } } } double[] intensities_array = new double[intensities.size()]; for (int i = 0; i < intensities.size(); i++) { intensities_array[i] = intensities.get(i); } int nbins = jSlider7.getValue(); if (intensities_array.length > 0) { plot.addHistogramPlot("TMA points", intensities_array, 0, 256, nbins); } //else { // JOptionPane.showMessageDialog(this, "No TMA points have been found.", "No TMA points found.", JOptionPane.WARNING_MESSAGE); //} //// Cluster Points according to histograms if (doAlsoClustering) { // Find Clusters int n = getParam_nClusters(); // Create ARFF Data FastVector atts; Instances data; int i; // 1. create arff data format atts = new FastVector(1); for (i = 0; i < 1; i++) { atts.addElement(new Attribute(Integer.toString(i))); } // 2. create Instances object data = new Instances("TMA points", atts, tmarker.getNumberNuclei(tss)); // 3. fill with data for (i = 0; i < intensities_array.length; i++) { // add the instance Instance inst = new Instance(1.0, new double[] { intensities_array[i] }); inst.setDataset(data); data.add(inst); } // 4. set data class index (last attribute is the class) //data.setClassIndex(data.numAttributes() - 1); // not for weka 3.5.X if (tmarker.DEBUG > 4) { java.util.logging.Logger.getLogger(getClass().getName()).log(java.util.logging.Level.INFO, data.toString()); } Clusterer clusterer = getClusterer(); String[] options = getClustererOptions(); if (tmarker.DEBUG > 3) { if (options.length > 0) { String info = "Clusterer should have options:\n"; for (String o : options) { info += o + " "; } info += "\n"; java.util.logging.Logger.getLogger(getClass().getName()) .log(java.util.logging.Level.INFO, info); } } clusterer.setOptions(options); // set the clusterer options clusterer.buildClusterer(data); // build the clusterer // order the clusters according to the brightness // The most bright cluster should be 0, then 1, then 2,... ArrayList<ArrayList<Double>> values = new ArrayList<>(); for (i = 0; i < n; i++) { values.add(new ArrayList<Double>()); } int z; double value; for (i = 0; i < data.numInstances(); i++) { z = clusterer.clusterInstance(data.instance(i)); value = data.instance(i).value(0); values.get(z).add(value); } double[] means = new double[n]; double[] stds = new double[n]; for (i = 0; i < n; i++) { means[i] = Misc.mean(values.get(i).toArray(new Double[values.get(i).size()])); stds[i] = Misc.std(values.get(i).toArray(new Double[values.get(i).size()])); } int[] ordering = Misc.orderArray(means, true); for (i = 0; i < n; i++) { int ind = Misc.IndexOf(ordering, i); plot.addPlotable(new Line(getParam_ColorOfClassK(i), new double[] { means[ind], plot.plotCanvas.base.roundXmin[1] }, new double[] { means[ind], plot.plotCanvas.base.roundXmax[1] }, 2 * stds[ind])); plot.addPlot(Plot2DPanel.LINE, "Staining " + i, getParam_ColorOfClassK(i), new double[][] { new double[] { means[ind], plot.plotCanvas.base.roundXmin[1] }, new double[] { means[ind], plot.plotCanvas.base.roundXmax[1] } }); } String clusterInfo = ""; for (String o : clusterer.getOptions()) { clusterInfo += o + " "; } clusterInfo += "\n\n"; clusterInfo += clusterer.toString().trim(); if (getParam_AutomaticClustererString().equalsIgnoreCase("Hierarchical")) { try { clusterInfo += ((HierarchicalClusterer) clusterer).graph(); HierarchyVisualizer a = new HierarchyVisualizer( ((HierarchicalClusterer) clusterer).graph()); a.setSize(800, 600); if (clusterVisualizer == null) { clusterVisualizer = new JFrame("Hierarchical Clusterer Dendrogram"); clusterVisualizer.setIconImage(getIconImage()); clusterVisualizer.setDefaultCloseOperation(JFrame.DISPOSE_ON_CLOSE); clusterVisualizer.setSize(800, 600); } Container contentPane = clusterVisualizer.getContentPane(); contentPane.removeAll(); contentPane.add(a); } catch (Exception e) { clusterVisualizer = null; } } jTextArea1.setText(clusterInfo); if (tmarker.DEBUG > 3) { String info = "Clusterer has options\n"; for (String o : clusterer.getOptions()) { info += o + " "; } info += "\n"; info += clusterer.toString() + "\n"; // info += (clusterer).globalInfo() + "\n"; info += "\n"; info += clusterInfo + "\n"; java.util.logging.Logger.getLogger(getClass().getName()).log(java.util.logging.Level.INFO, info); } // cluster all TMAspots and assign the corresponding class to them // Cluster the points List<List<Integer>> clustered_points = new ArrayList<>(); for (i = 0; i < n; i++) { clustered_points.add(new ArrayList<Integer>()); } int k; for (TMAspot ts : tss) { //TODO: GET THE CHANNEL 2 IMAGE //BufferedImage img = ts.getBufferedImage(TMAspot.SHOW_CHANNEL2_IMAGE, false); BufferedImage img = ts.getBufferedImage(false); List<TMApoint> tps = ts.getPoints(); for (TMApoint tp : tps) { intensity = TMAspot.getAverageColorAtPoint(img, tp.x, tp.y, ts.getParam_r(), false) .getRed(); // add the instance Instance inst = new Instance(1.0, new double[] { intensity }); inst.setDataset(data); k = ordering[clusterer.clusterInstance(inst)]; // store the color for later visualization clustered_points.get(k).add(intensity); // set the staining of the TMApoint switch (k) { case 0: tp.setStaining(TMALabel.STAINING_0); break; case 1: tp.setStaining(TMALabel.STAINING_1); break; case 2: tp.setStaining(TMALabel.STAINING_2); break; default: tp.setStaining(TMALabel.STAINING_3); break; } } ts.dispStainingInfo(); if (manager.getVisibleTMAspot() == ts) { manager.repaintVisibleTMAspot(); } } // Write the description String description = "Nuclei clustered with " + getParam_AutomaticClustererString(); if (getParam_AutomaticClustererString().equalsIgnoreCase("Hierarchical")) { description += " (" + getParam_HierarchicalClusteringMethod() + ")"; } description += ", n=" + getParam_nClusters() + ", channel 2 intensity."; jLabel42.setText(description); jLabel41.setText(" "); } } catch (Exception e) { e.printStackTrace(); } finally { this.setCursor(Cursor.getPredefinedCursor(Cursor.DEFAULT_CURSOR)); } } }
From source file:intensityclustering.IntensityClustering.java
/** * Clusters the TMApoints on given TMAspots according to their staining * intensity (color). All parameters (e.g. clusterer and parameters) are * selected by the user. Features are simple color features. * * @param tss The TMAspots of which all nuclei (gold-standard and estimated) * are clustered according to color./*from www. ja v a 2s .c om*/ */ private void clusterPointsAutomaticallyColorSpace(List<TMAspot> tss) { if (tss.size() > 0) { try { this.setCursor(Cursor.getPredefinedCursor(Cursor.WAIT_CURSOR)); int n = getParam_nClusters(); // Create ARFF Data FastVector atts; Instances data; int i; // 1. create arff data format atts = new FastVector(3); for (i = 0; i < 3; i++) { atts.addElement(new Attribute(Integer.toString(i))); } // 2. create Instances object data = new Instances("TMA points", atts, tmarker.getNumberNuclei(tss)); // 3. fill with data BufferedImage img; Color c; float[] features = new float[3]; String colorSpace = getParam_ColorSpace(); for (TMAspot ts : tss) { img = ts.getBufferedImage(); List<TMApoint> tps = ts.getPoints(); for (TMApoint tp : tps) { Color2Feature(TMAspot.getAverageColorAtPoint(img, tp.x, tp.y, ts.getParam_r(), false), colorSpace, features); // add the instance Instance inst = new Instance(1.0, new double[] { features[0], features[1], features[2] }); inst.setDataset(data); data.add(inst); } } // 4. set data class index (last attribute is the class) //data.setClassIndex(data.numAttributes() - 1); // not for weka 3.5.X if (tmarker.DEBUG > 4) { java.util.logging.Logger.getLogger(getClass().getName()).log(java.util.logging.Level.INFO, data.toString()); } Clusterer clusterer = getClusterer(); String[] options = getClustererOptions(); if (false && colorSpace.equalsIgnoreCase("hsb")) { String[] newoptions = new String[options.length + 2]; System.arraycopy(options, 0, newoptions, 0, options.length); newoptions[options.length] = "-A"; newoptions[options.length + 1] = "weka.core.MyHSBDistance"; options = newoptions; } if (tmarker.DEBUG > 3) { if (options.length > 0) { String info = "Clusterer should have options\n"; for (String o : options) { info += o + " "; } info += "\n"; java.util.logging.Logger.getLogger(getClass().getName()).log(java.util.logging.Level.INFO, info); } } clusterer.setOptions(options); // set the clusterer options clusterer.buildClusterer(data); // build the clusterer // order the clusters according to the brightness // The most bright cluster should be 0, then 1, then 2,... ArrayList<ArrayList<Double>> values = new ArrayList<>(); for (i = 0; i < clusterer.numberOfClusters(); i++) { values.add(new ArrayList<Double>()); } int z; double value; for (i = 0; i < data.numInstances(); i++) { z = clusterer.clusterInstance(data.instance(i)); value = getParam_ColorSpace().equalsIgnoreCase("hsb") ? data.instance(i).value(2) : Misc.RGBToGray(data.instance(i).value(0), data.instance(i).value(1), data.instance(i).value(2)); values.get(z).add(value); } double[] means = new double[clusterer.numberOfClusters()]; for (i = 0; i < clusterer.numberOfClusters(); i++) { means[i] = Misc.mean(values.get(i).toArray(new Double[values.get(i).size()])); } int[] ordering = Misc.orderArray(means, !getParam_ColorSpace().equalsIgnoreCase("rtp")); String clusterInfo = ""; for (String o : clusterer.getOptions()) { clusterInfo += o + " "; } clusterInfo += "\n\n"; clusterInfo += clusterer.toString().trim(); if (getParam_AutomaticClustererString().equalsIgnoreCase("Hierarchical")) { try { clusterInfo += ((HierarchicalClusterer) clusterer).graph(); HierarchyVisualizer a = new HierarchyVisualizer( ((HierarchicalClusterer) clusterer).graph()); a.setSize(800, 600); if (clusterVisualizer == null) { clusterVisualizer = new JFrame("Hierarchical Clusterer Dendrogram"); clusterVisualizer.setIconImage(getIconImage()); clusterVisualizer.setDefaultCloseOperation(JFrame.DISPOSE_ON_CLOSE); clusterVisualizer.setSize(800, 600); } Container contentPane = clusterVisualizer.getContentPane(); contentPane.removeAll(); contentPane.add(a); } catch (Exception e) { clusterVisualizer = null; } } jTextArea1.setText(clusterInfo); if (tmarker.DEBUG > 3) { String info = "Clusterer has options\n"; for (String o : clusterer.getOptions()) { info += o + " "; } info += "\n"; info += clusterer.toString() + "\n"; // info += (clusterer).globalInfo() + "\n"; info += "\n"; info += clusterInfo + "\n"; java.util.logging.Logger.getLogger(getClass().getName()).log(java.util.logging.Level.INFO, info); } // cluster all TMAspots and assign the corresponding class to them // Cluster the points List<List<Color>> clustered_points = new ArrayList<>(); for (i = 0; i < clusterer.numberOfClusters(); i++) { clustered_points.add(new ArrayList<Color>()); } int k; for (TMAspot ts : tss) { img = ts.getBufferedImage(); List<TMApoint> tps = ts.getPoints(); for (TMApoint tp : tps) { c = TMAspot.getAverageColorAtPoint(img, tp.x, tp.y, ts.getParam_r(), false); Color2Feature(c, colorSpace, features); // add the instance Instance inst = new Instance(1.0, new double[] { features[0], features[1], features[2] }); inst.setDataset(data); k = ordering[clusterer.clusterInstance(inst)]; // store the color for later visualization clustered_points.get(k).add(c); // set the staining of the TMApoint switch (k) { case 0: tp.setStaining(TMALabel.STAINING_0); break; case 1: tp.setStaining(TMALabel.STAINING_1); break; case 2: tp.setStaining(TMALabel.STAINING_2); break; default: tp.setStaining(TMALabel.STAINING_3); break; } } ts.dispStainingInfo(); if (manager.getVisibleTMAspot() == ts) { manager.repaintVisibleTMAspot(); } } // draw the points Plot3DPanel plot; if (((java.awt.BorderLayout) (jPanel2.getLayout())) .getLayoutComponent(java.awt.BorderLayout.CENTER) != null) { plot = (Plot3DPanel) ((java.awt.BorderLayout) (jPanel2.getLayout())) .getLayoutComponent(java.awt.BorderLayout.CENTER); plot.removeAllPlots(); } else { plot = new Plot3DPanel(); plot.plotCanvas.setBackground(jPanel2.getBackground()); plot.addLegend(PlotPanel.SOUTH); plot.plotLegend.setBackground(jPanel2.getBackground()); } if (colorSpace.equalsIgnoreCase("hsb")) { plot.setAxisLabels("Hue", "Saturation", "Brightness"); } else if (colorSpace.equalsIgnoreCase("rtp")) { plot.setAxisLabels("R", "Theta", "Phi"); } else { plot.setAxisLabels("Red", "Green", "Blue"); } for (i = 0; i < clusterer.numberOfClusters(); i++) { double[] xs = new double[clustered_points.get(i).size()]; double[] ys = new double[clustered_points.get(i).size()]; double[] zs = new double[clustered_points.get(i).size()]; for (int j = 0; j < clustered_points.get(i).size(); j++) { Color2Feature(clustered_points.get(i).get(j), colorSpace, features); xs[j] = features[0]; ys[j] = features[1]; zs[j] = features[2]; } if (xs.length > 0) { c = getParam_ColorOfClassK(i); plot.addScatterPlot("Staining " + i, c, xs, ys, zs); } } // Write the description String description = "Nuclei clustered with " + getParam_AutomaticClustererString(); if (getParam_AutomaticClustererString().equalsIgnoreCase("Hierarchical")) { description += " (" + getParam_HierarchicalClusteringMethod() + ")"; } description += ", n=" + getParam_nClusters() + ", color space " + getParam_ColorSpace() + "."; jLabel41.setText(description); jLabel42.setText(" "); if (((java.awt.BorderLayout) (jPanel2.getLayout())) .getLayoutComponent(java.awt.BorderLayout.CENTER) == null) { jPanel2.add(plot, java.awt.BorderLayout.CENTER); validate(); pack(); } } catch (Exception | OutOfMemoryError e) { java.util.logging.Logger.getLogger(getClass().getName()).log(java.util.logging.Level.SEVERE, null, e); JOptionPane.showMessageDialog(this, "The clustering could not be performed.\n\n" + "A possible reasons is:\n" + "- Not enough memory (too many points), \n\n" + "You might want to try a different clustering method or less TMAspots.\n\n" + "The error message is: \n" + e.getMessage(), "Error at Nucleus clustering", JOptionPane.WARNING_MESSAGE); } finally { this.setCursor(Cursor.getPredefinedCursor(Cursor.DEFAULT_CURSOR)); } } }
From source file:iris.ID3.java
public double calculateEntropy(Instances instances) { // Array to hold counts for each class double[] numInEachClass = new double[instances.numClasses()]; // Loop through every instance in one bin for (int i = 0; i < instances.numInstances(); i++) { // Increment the count for the class that the instance belongs to numInEachClass[(int) instances.instance(i).classValue()]++; }/*ww w .jav a 2s.c om*/ // Instantiate the entropy value double entropy = 0; // Loop through number of classes to sum log operations for (int i = 0; i < instances.numClasses(); i++) { // Handle missing data if (numInEachClass[i] > 0) { // Logarithm algorithm for entropy entropy -= (numInEachClass[i] / instances.numInstances()) * Utils.log2(numInEachClass[i] / instances.numInstances()); } } return entropy; }
From source file:iris.Network.java
@Override public void buildClassifier(Instances trainingSet) throws Exception { trainingData = trainingSet;/*from w w w . j av a 2 s. c o m*/ // Set the number of inputs to the network to the number of attributes // i.e., 4 for the IRIS set setInputCount(trainingSet.numAttributes() - 1); buildNetwork(inputCount, neuronsInEachLayer); List<Double> values = new ArrayList<>(); //stores the values our output layer gave us after feed forward List<Double> finalValues = new ArrayList<>(); for (int i = 0; i < trainingSet.numInstances(); i++) { // Set values of instance for (int j = 0; j < trainingSet.instance(i).numAttributes() - 1; j++) { values.add(trainingSet.instance(i).value(j)); } //gets the new values of what we calculated for the classification //this is probably where we want to loop x amount of times for (int k = 0; k < numIterations; k++) { getOutputs(values); backPropogate(trainingSet.instance(i)); } values.clear(); // reset list } }
From source file:irisdriver.IrisDriver.java
/** * @param args the command line arguments *///from w ww . ja va 2 s . c om public static void main(String[] args) { //As an example of arguments: sepallength=5.1 sepalwidth=3.5 petallength=1.4 petalwidth=0.2 try { Hashtable<String, String> values = new Hashtable<String, String>(); /*Iris irisModel = new Iris(); for(int i = 0; i < args.length; i++) { String[] tokens = args[i].split("="); values.put(tokens[0], tokens[1]); } System.out.println("Classification: " + irisModel.classifySpecies(values));*/ //Loading the model String pathModel = ""; String pathTestSet = ""; JFileChooser chooserModel = new JFileChooser(); chooserModel.setCurrentDirectory(new java.io.File(".")); chooserModel.setDialogTitle("Choose the model"); chooserModel.setFileSelectionMode(JFileChooser.FILES_AND_DIRECTORIES); chooserModel.setAcceptAllFileFilterUsed(true); if (chooserModel.showOpenDialog(null) == JFileChooser.APPROVE_OPTION) { File filePathModel = chooserModel.getSelectedFile(); pathModel = filePathModel.getPath(); Iris irisModel = new Iris(pathModel); //Loading the model JFileChooser chooserTestSet = new JFileChooser(); chooserTestSet.setDialogTitle("Choose TEST SET"); chooserTestSet.setFileSelectionMode(JFileChooser.FILES_AND_DIRECTORIES); chooserTestSet.setAcceptAllFileFilterUsed(true); //Loading the testing dataset if (chooserTestSet.showOpenDialog(null) == JFileChooser.APPROVE_OPTION) { File filePathTestSet = chooserTestSet.getSelectedFile(); pathTestSet = filePathTestSet.getPath(); //WRITTING THE OUTPUT: BufferedWriter writer = new BufferedWriter(new FileWriter("D:\\output_file.txt")); //Transforming the data set into pairs attribute-value ConverterUtils.DataSource unlabeledSource = new ConverterUtils.DataSource(pathTestSet); Instances unlabeledData = unlabeledSource.getDataSet(); if (unlabeledData.classIndex() == -1) { unlabeledData.setClassIndex(unlabeledData.numAttributes() - 1); } for (int i = 0; i < unlabeledData.numInstances(); i++) { Instance ins = unlabeledData.instance(i); //ins.numAttributes()-1 --> not to include the label for (int j = 0; j < ins.numAttributes() - 1; j++) { String attrib = ins.attribute(j).name(); double val = ins.value(ins.attribute(j)); values.put(attrib, String.valueOf(val)); } String predictedLabel = irisModel.classifySpecies(values); System.out.println("Classification: " + predictedLabel); values.clear(); //Writting the results in a txt writer.write("The label is: " + predictedLabel); //writer.newLine(); //writers.write("The error rate of the prediction is : " + eval.errorRate()); //writer.newLine(); } writer.flush(); writer.close(); } } } catch (Exception ex) { Logger.getLogger(IrisDriver.class.getName()).log(Level.SEVERE, null, ex); } }
From source file:j48.BinC45Split.java
License:Open Source License
/** * Creates split on numeric attribute./*from ww w. jav a2 s. co m*/ * * @exception Exception if something goes wrong */ private void handleNumericAttribute(Instances trainInstances) throws Exception { int firstMiss; int next = 1; int last = 0; int index = 0; int splitIndex = -1; double currentInfoGain; double defaultEnt; double minSplit; Instance instance; int i; // Current attribute is a numeric attribute. m_distribution = new Distribution(2, trainInstances.numClasses()); // Only Instances with known values are relevant. Enumeration enu = trainInstances.enumerateInstances(); i = 0; while (enu.hasMoreElements()) { instance = (Instance) enu.nextElement(); if (instance.isMissing(m_attIndex)) break; m_distribution.add(1, instance); i++; } firstMiss = i; // Compute minimum number of Instances required in each // subset. minSplit = 0.1 * (m_distribution.total()) / ((double) trainInstances.numClasses()); if (Utils.smOrEq(minSplit, m_minNoObj)) minSplit = m_minNoObj; else if (Utils.gr(minSplit, 25)) minSplit = 25; // Enough Instances with known values? if (Utils.sm((double) firstMiss, 2 * minSplit)) return; // Compute values of criteria for all possible split // indices. defaultEnt = m_infoGainCrit.oldEnt(m_distribution); while (next < firstMiss) { if (trainInstances.instance(next - 1).value(m_attIndex) + 1e-5 < trainInstances.instance(next) .value(m_attIndex)) { // Move class values for all Instances up to next // possible split point. m_distribution.shiftRange(1, 0, trainInstances, last, next); // Check if enough Instances in each subset and compute // values for criteria. if (Utils.grOrEq(m_distribution.perBag(0), minSplit) && Utils.grOrEq(m_distribution.perBag(1), minSplit)) { currentInfoGain = m_infoGainCrit.splitCritValue(m_distribution, m_sumOfWeights, defaultEnt); if (Utils.gr(currentInfoGain, m_infoGain)) { m_infoGain = currentInfoGain; splitIndex = next - 1; } index++; } last = next; } next++; } // Was there any useful split? if (index == 0) return; // Compute modified information gain for best split. m_infoGain = m_infoGain - (Utils.log2(index) / m_sumOfWeights); if (Utils.smOrEq(m_infoGain, 0)) return; // Set instance variables' values to values for // best split. m_numSubsets = 2; m_splitPoint = (trainInstances.instance(splitIndex + 1).value(m_attIndex) + trainInstances.instance(splitIndex).value(m_attIndex)) / 2; // In case we have a numerical precision problem we need to choose the // smaller value if (m_splitPoint == trainInstances.instance(splitIndex + 1).value(m_attIndex)) { m_splitPoint = trainInstances.instance(splitIndex).value(m_attIndex); } // Restore distributioN for best split. m_distribution = new Distribution(2, trainInstances.numClasses()); m_distribution.addRange(0, trainInstances, 0, splitIndex + 1); m_distribution.addRange(1, trainInstances, splitIndex + 1, firstMiss); // Compute modified gain ratio for best split. m_gainRatio = m_gainRatioCrit.splitCritValue(m_distribution, m_sumOfWeights, m_infoGain); }