List of usage examples for weka.core Instances numInstances
publicint numInstances()
From source file:id3.MyID3.java
/** * Membagi dataset menurut atribute value * @param data instance/*from www . ja va2 s. co m*/ * @param att atribut input * @return instance hasil split */ public Instances[] splitData(Instances data, Attribute att) { Instances[] instancesSplitBasedAttribute = new Instances[att.numValues()]; for (int i = 0; i < att.numValues(); i++) { instancesSplitBasedAttribute[i] = new Instances(data, data.numInstances()); } for (int i = 0; i < data.numInstances(); i++) { instancesSplitBasedAttribute[(int) data.instance(i).value(att)].add(data.instance(i)); } return instancesSplitBasedAttribute; }
From source file:id3classifier.ID3Classifiers.java
@Override public void buildClassifier(Instances instances) throws Exception { // create list of instances of size instances' number of instances // create list of attributes of size instances' number of attributes List<Instance> instanceList = new ArrayList<>(instances.numInstances()); List<Attribute> attributeList = new ArrayList<>(instances.numAttributes()); // from index 0 to instances' number of instances, add instances' current // instance to the list of instances... mouthfull for (int i = 0; i < instances.numInstances(); i++) { instanceList.add(instances.instance(i)); }//from w ww .j a v a2s . co m // from index 0 to instances' number of attributes, if the index is not // equal to instances' class index... for (int i = 0; i < instances.numAttributes(); i++) { if (i != instances.classIndex()) { // add instances' current attribute to the attribute list attributeList.add(instances.attribute(i)); } } // set tree equal to the tree built by buildTree() using the instance // list and the attribute list tree = buildTree(instanceList, attributeList); }
From source file:id3classifier.Main.java
public static void main(String[] args) throws Exception { ConverterUtils.DataSource source = new ConverterUtils.DataSource(file); Instances dataSet = source.getDataSet(); // discretize the dataset Discretize filter = new Discretize(); filter.setInputFormat(dataSet);/* ww w.j a v a 2 s. co m*/ dataSet = Filter.useFilter(dataSet, filter); // standardize the dataset Standardize standardizedData = new Standardize(); standardizedData.setInputFormat(dataSet); dataSet = Filter.useFilter(dataSet, standardizedData); // randomize the dataset dataSet.setClassIndex(dataSet.numAttributes() - 1); dataSet.randomize(new Debug.Random()); // get the sizes of the training and testing sets and split int trainingSize = (int) Math.round(dataSet.numInstances() * .7); int testSize = dataSet.numInstances() - trainingSize; Instances training = new Instances(dataSet, 0, trainingSize); Instances test = new Instances(dataSet, trainingSize, testSize); // set up the ID3 classifier on the training data ID3Classifiers classifier = new ID3Classifiers(); classifier.buildClassifier(training); // set up the evaluation and test using the classifier and test set Evaluation eval = new Evaluation(dataSet); eval.evaluateModel(classifier, test); // outup and kill, important to exit here to stop javaFX System.out.println(eval.toSummaryString("\nResults\n======\n", false)); System.exit(0); }
From source file:id3j48.WekaAccess.java
public static Evaluation percentageSplit(Instances data, Classifier classifier, int percentage) throws Exception { Instances tempdata = new Instances(data); tempdata.randomize(new Random(1)); int trainSize = Math.round(tempdata.numInstances() * percentage / 100); int testSize = tempdata.numInstances() - trainSize; Instances train = new Instances(tempdata, 0, trainSize); Instances test = new Instances(tempdata, trainSize, testSize); classifier.buildClassifier(train);/* w w w .ja v a 2s . co m*/ Evaluation eval = testModel(classifier, train, test); return eval; }
From source file:id3j48.WekaAccess.java
public static void classify(String filename, Classifier classifier) throws Exception { Instances input = readArff(filename); input.setClassIndex(input.numAttributes() - 1); for (int i = 0; i < input.numInstances(); i++) { double classLabel = classifier.classifyInstance(input.instance(i)); input.instance(i).setClassValue(classLabel); System.out.println("Instance: " + input.instance(i)); System.out.println("Class: " + input.classAttribute().value((int) classLabel)); }/*from w ww.java 2 s .c om*/ try (BufferedWriter writer = new BufferedWriter( new FileWriter(classifiedFolder + File.separator + filename))) { writer.write(input.toString()); writer.newLine(); writer.flush(); } }
From source file:intensityclustering.IntensityClustering.java
/** * Draws the 2D Histogram Plot in the IntensityClustering. X-Axsis is * intensity value of chanel 2 image (where the stained nuclei are). Y-axis * are relative frequencies of present nuclei. * * @param tss The TMAspots whose nuclei are considered (both gold-standard * and estimated nuclei)./*w ww .j a va 2 s .c o m*/ * @param doAlsoClustering If true, the TMApoints are also clustered * according to the histogram. */ void drawNucleiIntensities2D(List<TMAspot> tss, boolean doAlsoClustering) { // draw the plot Plot2DPanel plot; if (((java.awt.BorderLayout) (jPanel9.getLayout())) .getLayoutComponent(java.awt.BorderLayout.CENTER) != null) { plot = (Plot2DPanel) ((java.awt.BorderLayout) (jPanel9.getLayout())) .getLayoutComponent(java.awt.BorderLayout.CENTER); plot.removeAllPlots(); plot.removeAllPlotables(); } else { plot = new Plot2DPanel(PlotPanel.SOUTH); plot.setAxisLabels("Intensity", "Frequency"); plot.plotCanvas.setBackground(jPanel9.getBackground()); plot.plotLegend.setBackground(jPanel9.getBackground()); plot.plotToolBar.setBackground(plot.plotCanvas.getBackground()); } if (((java.awt.BorderLayout) (jPanel9.getLayout())) .getLayoutComponent(java.awt.BorderLayout.CENTER) == null) { jPanel9.add(plot, java.awt.BorderLayout.CENTER); jPanel15.setBackground(plot.plotCanvas.getBackground()); jPanel15.setVisible(true); validate(); pack(); } if (tss.size() > 0) { try { this.setCursor(Cursor.getPredefinedCursor(Cursor.WAIT_CURSOR)); List<Integer> intensities = new ArrayList<>(); int intensity; int min = Integer.parseInt(jTextField1.getText()); int max = Integer.parseInt(jTextField16.getText()); for (TMAspot ts : tss) { //TODO: GET THE CHANNEL 2 Image //BufferedImage img = ts.getBufferedImage(TMAspot.SHOW_CHANNEL2_IMAGE, false); BufferedImage img = ts.getBufferedImage(false); // img can be null if color deconvolution has not been performed, yet. if (img != null) { List<TMApoint> tps = ts.getPoints(); for (TMALabel tp : tps) { intensity = TMAspot.getAverageColorAtPoint(img, tp.x, tp.y, ts.getParam_r(), false) .getRed(); if (intensity >= min && intensity <= max) { intensities.add(intensity); } } } } double[] intensities_array = new double[intensities.size()]; for (int i = 0; i < intensities.size(); i++) { intensities_array[i] = intensities.get(i); } int nbins = jSlider7.getValue(); if (intensities_array.length > 0) { plot.addHistogramPlot("TMA points", intensities_array, 0, 256, nbins); } //else { // JOptionPane.showMessageDialog(this, "No TMA points have been found.", "No TMA points found.", JOptionPane.WARNING_MESSAGE); //} //// Cluster Points according to histograms if (doAlsoClustering) { // Find Clusters int n = getParam_nClusters(); // Create ARFF Data FastVector atts; Instances data; int i; // 1. create arff data format atts = new FastVector(1); for (i = 0; i < 1; i++) { atts.addElement(new Attribute(Integer.toString(i))); } // 2. create Instances object data = new Instances("TMA points", atts, tmarker.getNumberNuclei(tss)); // 3. fill with data for (i = 0; i < intensities_array.length; i++) { // add the instance Instance inst = new Instance(1.0, new double[] { intensities_array[i] }); inst.setDataset(data); data.add(inst); } // 4. set data class index (last attribute is the class) //data.setClassIndex(data.numAttributes() - 1); // not for weka 3.5.X if (tmarker.DEBUG > 4) { java.util.logging.Logger.getLogger(getClass().getName()).log(java.util.logging.Level.INFO, data.toString()); } Clusterer clusterer = getClusterer(); String[] options = getClustererOptions(); if (tmarker.DEBUG > 3) { if (options.length > 0) { String info = "Clusterer should have options:\n"; for (String o : options) { info += o + " "; } info += "\n"; java.util.logging.Logger.getLogger(getClass().getName()) .log(java.util.logging.Level.INFO, info); } } clusterer.setOptions(options); // set the clusterer options clusterer.buildClusterer(data); // build the clusterer // order the clusters according to the brightness // The most bright cluster should be 0, then 1, then 2,... ArrayList<ArrayList<Double>> values = new ArrayList<>(); for (i = 0; i < n; i++) { values.add(new ArrayList<Double>()); } int z; double value; for (i = 0; i < data.numInstances(); i++) { z = clusterer.clusterInstance(data.instance(i)); value = data.instance(i).value(0); values.get(z).add(value); } double[] means = new double[n]; double[] stds = new double[n]; for (i = 0; i < n; i++) { means[i] = Misc.mean(values.get(i).toArray(new Double[values.get(i).size()])); stds[i] = Misc.std(values.get(i).toArray(new Double[values.get(i).size()])); } int[] ordering = Misc.orderArray(means, true); for (i = 0; i < n; i++) { int ind = Misc.IndexOf(ordering, i); plot.addPlotable(new Line(getParam_ColorOfClassK(i), new double[] { means[ind], plot.plotCanvas.base.roundXmin[1] }, new double[] { means[ind], plot.plotCanvas.base.roundXmax[1] }, 2 * stds[ind])); plot.addPlot(Plot2DPanel.LINE, "Staining " + i, getParam_ColorOfClassK(i), new double[][] { new double[] { means[ind], plot.plotCanvas.base.roundXmin[1] }, new double[] { means[ind], plot.plotCanvas.base.roundXmax[1] } }); } String clusterInfo = ""; for (String o : clusterer.getOptions()) { clusterInfo += o + " "; } clusterInfo += "\n\n"; clusterInfo += clusterer.toString().trim(); if (getParam_AutomaticClustererString().equalsIgnoreCase("Hierarchical")) { try { clusterInfo += ((HierarchicalClusterer) clusterer).graph(); HierarchyVisualizer a = new HierarchyVisualizer( ((HierarchicalClusterer) clusterer).graph()); a.setSize(800, 600); if (clusterVisualizer == null) { clusterVisualizer = new JFrame("Hierarchical Clusterer Dendrogram"); clusterVisualizer.setIconImage(getIconImage()); clusterVisualizer.setDefaultCloseOperation(JFrame.DISPOSE_ON_CLOSE); clusterVisualizer.setSize(800, 600); } Container contentPane = clusterVisualizer.getContentPane(); contentPane.removeAll(); contentPane.add(a); } catch (Exception e) { clusterVisualizer = null; } } jTextArea1.setText(clusterInfo); if (tmarker.DEBUG > 3) { String info = "Clusterer has options\n"; for (String o : clusterer.getOptions()) { info += o + " "; } info += "\n"; info += clusterer.toString() + "\n"; // info += (clusterer).globalInfo() + "\n"; info += "\n"; info += clusterInfo + "\n"; java.util.logging.Logger.getLogger(getClass().getName()).log(java.util.logging.Level.INFO, info); } // cluster all TMAspots and assign the corresponding class to them // Cluster the points List<List<Integer>> clustered_points = new ArrayList<>(); for (i = 0; i < n; i++) { clustered_points.add(new ArrayList<Integer>()); } int k; for (TMAspot ts : tss) { //TODO: GET THE CHANNEL 2 IMAGE //BufferedImage img = ts.getBufferedImage(TMAspot.SHOW_CHANNEL2_IMAGE, false); BufferedImage img = ts.getBufferedImage(false); List<TMApoint> tps = ts.getPoints(); for (TMApoint tp : tps) { intensity = TMAspot.getAverageColorAtPoint(img, tp.x, tp.y, ts.getParam_r(), false) .getRed(); // add the instance Instance inst = new Instance(1.0, new double[] { intensity }); inst.setDataset(data); k = ordering[clusterer.clusterInstance(inst)]; // store the color for later visualization clustered_points.get(k).add(intensity); // set the staining of the TMApoint switch (k) { case 0: tp.setStaining(TMALabel.STAINING_0); break; case 1: tp.setStaining(TMALabel.STAINING_1); break; case 2: tp.setStaining(TMALabel.STAINING_2); break; default: tp.setStaining(TMALabel.STAINING_3); break; } } ts.dispStainingInfo(); if (manager.getVisibleTMAspot() == ts) { manager.repaintVisibleTMAspot(); } } // Write the description String description = "Nuclei clustered with " + getParam_AutomaticClustererString(); if (getParam_AutomaticClustererString().equalsIgnoreCase("Hierarchical")) { description += " (" + getParam_HierarchicalClusteringMethod() + ")"; } description += ", n=" + getParam_nClusters() + ", channel 2 intensity."; jLabel42.setText(description); jLabel41.setText(" "); } } catch (Exception e) { e.printStackTrace(); } finally { this.setCursor(Cursor.getPredefinedCursor(Cursor.DEFAULT_CURSOR)); } } }
From source file:intensityclustering.IntensityClustering.java
/** * Clusters the TMApoints on given TMAspots according to their staining * intensity (color). All parameters (e.g. clusterer and parameters) are * selected by the user. Features are simple color features. * * @param tss The TMAspots of which all nuclei (gold-standard and estimated) * are clustered according to color./*w w w. ja va2 s .c o m*/ */ private void clusterPointsAutomaticallyColorSpace(List<TMAspot> tss) { if (tss.size() > 0) { try { this.setCursor(Cursor.getPredefinedCursor(Cursor.WAIT_CURSOR)); int n = getParam_nClusters(); // Create ARFF Data FastVector atts; Instances data; int i; // 1. create arff data format atts = new FastVector(3); for (i = 0; i < 3; i++) { atts.addElement(new Attribute(Integer.toString(i))); } // 2. create Instances object data = new Instances("TMA points", atts, tmarker.getNumberNuclei(tss)); // 3. fill with data BufferedImage img; Color c; float[] features = new float[3]; String colorSpace = getParam_ColorSpace(); for (TMAspot ts : tss) { img = ts.getBufferedImage(); List<TMApoint> tps = ts.getPoints(); for (TMApoint tp : tps) { Color2Feature(TMAspot.getAverageColorAtPoint(img, tp.x, tp.y, ts.getParam_r(), false), colorSpace, features); // add the instance Instance inst = new Instance(1.0, new double[] { features[0], features[1], features[2] }); inst.setDataset(data); data.add(inst); } } // 4. set data class index (last attribute is the class) //data.setClassIndex(data.numAttributes() - 1); // not for weka 3.5.X if (tmarker.DEBUG > 4) { java.util.logging.Logger.getLogger(getClass().getName()).log(java.util.logging.Level.INFO, data.toString()); } Clusterer clusterer = getClusterer(); String[] options = getClustererOptions(); if (false && colorSpace.equalsIgnoreCase("hsb")) { String[] newoptions = new String[options.length + 2]; System.arraycopy(options, 0, newoptions, 0, options.length); newoptions[options.length] = "-A"; newoptions[options.length + 1] = "weka.core.MyHSBDistance"; options = newoptions; } if (tmarker.DEBUG > 3) { if (options.length > 0) { String info = "Clusterer should have options\n"; for (String o : options) { info += o + " "; } info += "\n"; java.util.logging.Logger.getLogger(getClass().getName()).log(java.util.logging.Level.INFO, info); } } clusterer.setOptions(options); // set the clusterer options clusterer.buildClusterer(data); // build the clusterer // order the clusters according to the brightness // The most bright cluster should be 0, then 1, then 2,... ArrayList<ArrayList<Double>> values = new ArrayList<>(); for (i = 0; i < clusterer.numberOfClusters(); i++) { values.add(new ArrayList<Double>()); } int z; double value; for (i = 0; i < data.numInstances(); i++) { z = clusterer.clusterInstance(data.instance(i)); value = getParam_ColorSpace().equalsIgnoreCase("hsb") ? data.instance(i).value(2) : Misc.RGBToGray(data.instance(i).value(0), data.instance(i).value(1), data.instance(i).value(2)); values.get(z).add(value); } double[] means = new double[clusterer.numberOfClusters()]; for (i = 0; i < clusterer.numberOfClusters(); i++) { means[i] = Misc.mean(values.get(i).toArray(new Double[values.get(i).size()])); } int[] ordering = Misc.orderArray(means, !getParam_ColorSpace().equalsIgnoreCase("rtp")); String clusterInfo = ""; for (String o : clusterer.getOptions()) { clusterInfo += o + " "; } clusterInfo += "\n\n"; clusterInfo += clusterer.toString().trim(); if (getParam_AutomaticClustererString().equalsIgnoreCase("Hierarchical")) { try { clusterInfo += ((HierarchicalClusterer) clusterer).graph(); HierarchyVisualizer a = new HierarchyVisualizer( ((HierarchicalClusterer) clusterer).graph()); a.setSize(800, 600); if (clusterVisualizer == null) { clusterVisualizer = new JFrame("Hierarchical Clusterer Dendrogram"); clusterVisualizer.setIconImage(getIconImage()); clusterVisualizer.setDefaultCloseOperation(JFrame.DISPOSE_ON_CLOSE); clusterVisualizer.setSize(800, 600); } Container contentPane = clusterVisualizer.getContentPane(); contentPane.removeAll(); contentPane.add(a); } catch (Exception e) { clusterVisualizer = null; } } jTextArea1.setText(clusterInfo); if (tmarker.DEBUG > 3) { String info = "Clusterer has options\n"; for (String o : clusterer.getOptions()) { info += o + " "; } info += "\n"; info += clusterer.toString() + "\n"; // info += (clusterer).globalInfo() + "\n"; info += "\n"; info += clusterInfo + "\n"; java.util.logging.Logger.getLogger(getClass().getName()).log(java.util.logging.Level.INFO, info); } // cluster all TMAspots and assign the corresponding class to them // Cluster the points List<List<Color>> clustered_points = new ArrayList<>(); for (i = 0; i < clusterer.numberOfClusters(); i++) { clustered_points.add(new ArrayList<Color>()); } int k; for (TMAspot ts : tss) { img = ts.getBufferedImage(); List<TMApoint> tps = ts.getPoints(); for (TMApoint tp : tps) { c = TMAspot.getAverageColorAtPoint(img, tp.x, tp.y, ts.getParam_r(), false); Color2Feature(c, colorSpace, features); // add the instance Instance inst = new Instance(1.0, new double[] { features[0], features[1], features[2] }); inst.setDataset(data); k = ordering[clusterer.clusterInstance(inst)]; // store the color for later visualization clustered_points.get(k).add(c); // set the staining of the TMApoint switch (k) { case 0: tp.setStaining(TMALabel.STAINING_0); break; case 1: tp.setStaining(TMALabel.STAINING_1); break; case 2: tp.setStaining(TMALabel.STAINING_2); break; default: tp.setStaining(TMALabel.STAINING_3); break; } } ts.dispStainingInfo(); if (manager.getVisibleTMAspot() == ts) { manager.repaintVisibleTMAspot(); } } // draw the points Plot3DPanel plot; if (((java.awt.BorderLayout) (jPanel2.getLayout())) .getLayoutComponent(java.awt.BorderLayout.CENTER) != null) { plot = (Plot3DPanel) ((java.awt.BorderLayout) (jPanel2.getLayout())) .getLayoutComponent(java.awt.BorderLayout.CENTER); plot.removeAllPlots(); } else { plot = new Plot3DPanel(); plot.plotCanvas.setBackground(jPanel2.getBackground()); plot.addLegend(PlotPanel.SOUTH); plot.plotLegend.setBackground(jPanel2.getBackground()); } if (colorSpace.equalsIgnoreCase("hsb")) { plot.setAxisLabels("Hue", "Saturation", "Brightness"); } else if (colorSpace.equalsIgnoreCase("rtp")) { plot.setAxisLabels("R", "Theta", "Phi"); } else { plot.setAxisLabels("Red", "Green", "Blue"); } for (i = 0; i < clusterer.numberOfClusters(); i++) { double[] xs = new double[clustered_points.get(i).size()]; double[] ys = new double[clustered_points.get(i).size()]; double[] zs = new double[clustered_points.get(i).size()]; for (int j = 0; j < clustered_points.get(i).size(); j++) { Color2Feature(clustered_points.get(i).get(j), colorSpace, features); xs[j] = features[0]; ys[j] = features[1]; zs[j] = features[2]; } if (xs.length > 0) { c = getParam_ColorOfClassK(i); plot.addScatterPlot("Staining " + i, c, xs, ys, zs); } } // Write the description String description = "Nuclei clustered with " + getParam_AutomaticClustererString(); if (getParam_AutomaticClustererString().equalsIgnoreCase("Hierarchical")) { description += " (" + getParam_HierarchicalClusteringMethod() + ")"; } description += ", n=" + getParam_nClusters() + ", color space " + getParam_ColorSpace() + "."; jLabel41.setText(description); jLabel42.setText(" "); if (((java.awt.BorderLayout) (jPanel2.getLayout())) .getLayoutComponent(java.awt.BorderLayout.CENTER) == null) { jPanel2.add(plot, java.awt.BorderLayout.CENTER); validate(); pack(); } } catch (Exception | OutOfMemoryError e) { java.util.logging.Logger.getLogger(getClass().getName()).log(java.util.logging.Level.SEVERE, null, e); JOptionPane.showMessageDialog(this, "The clustering could not be performed.\n\n" + "A possible reasons is:\n" + "- Not enough memory (too many points), \n\n" + "You might want to try a different clustering method or less TMAspots.\n\n" + "The error message is: \n" + e.getMessage(), "Error at Nucleus clustering", JOptionPane.WARNING_MESSAGE); } finally { this.setCursor(Cursor.getPredefinedCursor(Cursor.DEFAULT_CURSOR)); } } }
From source file:iris.ID3.java
public double calculateEntropy(Instances instances) { // Array to hold counts for each class double[] numInEachClass = new double[instances.numClasses()]; // Loop through every instance in one bin for (int i = 0; i < instances.numInstances(); i++) { // Increment the count for the class that the instance belongs to numInEachClass[(int) instances.instance(i).classValue()]++; }//from w ww . j a va2 s . c o m // Instantiate the entropy value double entropy = 0; // Loop through number of classes to sum log operations for (int i = 0; i < instances.numClasses(); i++) { // Handle missing data if (numInEachClass[i] > 0) { // Logarithm algorithm for entropy entropy -= (numInEachClass[i] / instances.numInstances()) * Utils.log2(numInEachClass[i] / instances.numInstances()); } } return entropy; }
From source file:iris.ID3.java
public double infoGain(Instances instances, Attribute att) { // Calculate total entropy double infoGain = calculateEntropy(instances); // Create bins Instances[] bins = makeBins(instances, att); // Loop through number of bins in attribute for (int i = 0; i < att.numValues(); i++) { // Applies weight to entropy value infoGain -= ((double) bins[i].numInstances() / (double) instances.numInstances()) * calculateEntropy(bins[i]); }// w ww. jav a 2s. c om return infoGain; }
From source file:iris.ID3.java
private Instances[] makeBins(Instances instances, Attribute att) { // Create array of bins based on numValues in Attribute parameter Instances[] bins = new Instances[att.numValues()]; for (int i = 0; i < att.numValues(); i++) { bins[i] = new Instances(instances, instances.numInstances()); }//from w ww. java 2s.c o m // Create pointer to first instance Enumeration instanceEnum = instances.enumerateInstances(); while (instanceEnum.hasMoreElements()) { // Create new instance from the one pointer is pointing at Instance oneInstance = (Instance) instanceEnum.nextElement(); // Add instance to the proper bin bins[(int) oneInstance.value(att)].add(oneInstance); } // Compactify for (int i = 0; i < bins.length; i++) { bins[i].compactify(); } return bins; }