List of usage examples for weka.core Instances instance
publicInstance instance(int index)
From source file:classifyfromimage1.java
private void jButton1ActionPerformed(java.awt.event.ActionEvent evt) {//GEN-FIRST:event_jButton1ActionPerformed selectWindow(this.name3); this.name3 = IJ.getImage().getTitle(); this.name4 = this.name3.replaceFirst("[.][^.]+$", ""); RoiManager rm = RoiManager.getInstance(); IJ.run("Duplicate...", this.name4); IJ.run("Set Measurements...", "area perimeter fit shape limit scientific redirect=None decimal=5"); selectWindow(this.name3); IJ.run("Subtract Background...", "rolling=1.5"); IJ.run("Enhance Contrast...", "saturated=25 equalize"); IJ.run("Subtract Background...", "rolling=1.5"); IJ.run("Convolve...", "text1=[-1 -3 -4 -3 -1\n-3 0 6 0 -3\n-4 6 50 6 -4\n-3 0 6 0 -3\n-1 -3 -4 -3 -1\n] normalize"); IJ.run("8-bit", ""); IJ.run("Restore Selection", ""); IJ.run("Make Binary", ""); Prefs.blackBackground = false;/* w w w .j a va 2s .co m*/ IJ.run("Convert to Mask", ""); IJ.run("Restore Selection", ""); this.valor1 = this.interval3.getText(); this.valor2 = this.interval4.getText(); this.text = "size=" + this.valor1 + "-" + this.valor2 + " pixel show=Outlines display include summarize add"; IJ.saveAs("tif", this.name3 + "_processed"); String dest_filename1, dest_filename2, full; selectWindow("Results"); //dest_filename1 = this.name2 + "_complete.txt"; dest_filename2 = this.name3 + "_complete.csv"; //IJ.saveAs("Results", prova + File.separator + dest_filename1); IJ.run("Input/Output...", "jpeg=85 gif=-1 file=.csv copy_row save_column save_row"); //IJ.saveAs("Results", dir + File.separator + dest_filename2); IJ.saveAs("Results", this.name3 + "_complete.csv"); IJ.run("Restore Selection"); IJ.run("Clear Results"); try { CSVLoader loader = new CSVLoader(); loader.setSource(new File(this.name3 + "_complete.csv")); Instances data = loader.getDataSet(); System.out.println(data); // save ARFF String arffile = this.name3 + ".arff"; System.out.println(arffile); ArffSaver saver = new ArffSaver(); saver.setInstances(data); saver.setFile(new File(arffile)); saver.writeBatch(); } catch (IOException ex) { Logger.getLogger(MachinLearningInterface.class.getName()).log(Level.SEVERE, null, ex); } Instances data; try { data = new Instances(new BufferedReader(new FileReader(this.name3 + ".arff"))); Instances newData = null; Add filter; newData = new Instances(data); filter = new Add(); filter.setAttributeIndex("last"); filter.setNominalLabels(txtlabel.getText()); filter.setAttributeName(txtpath2.getText()); filter.setInputFormat(newData); newData = Filter.useFilter(newData, filter); System.out.print(newData); Vector vec = new Vector(); newData.setClassIndex(newData.numAttributes() - 1); if (!newData.equalHeaders(newData)) { throw new IllegalArgumentException("Train and test are not compatible!"); } Classifier cls = (Classifier) weka.core.SerializationHelper.read(txtpath.getText()); System.out.println("PROVANT MODEL.classifyInstance"); for (int i = 0; i < newData.numInstances(); i++) { double pred = cls.classifyInstance(newData.instance(i)); double[] dist = cls.distributionForInstance(newData.instance(i)); System.out.print((i + 1) + " - "); System.out.print(newData.classAttribute().value((int) pred) + " - "); //txtarea2.setText(Utils.arrayToString(dist)); System.out.println(Utils.arrayToString(dist)); vec.add(newData.classAttribute().value((int) pred)); //txtarea2.append(Utils.arrayToString(dist)); classif.add(newData.classAttribute().value((int) pred)); } classif.removeAll(Arrays.asList("", null)); System.out.println(classif); String vecstring = ""; for (Object s : classif) { vecstring += s + ","; System.out.println("Hola " + vecstring); } Map<String, Integer> seussCount = new HashMap<String, Integer>(); for (String t : classif) { Integer i = seussCount.get(t); if (i == null) { i = 0; } seussCount.put(t, i + 1); } String s = vecstring; int counter = 0; for (int i = 0; i < s.length(); i++) { if (s.charAt(i) == '$') { counter++; } } System.out.println(seussCount); System.out.println("hola " + counter++); IJ.showMessage("Your file:" + this.name3 + "arff" + "\n is composed by" + seussCount); txtpath2.setText("Your file:" + this.name3 + "arff" + "\n is composed by" + seussCount); A_MachineLearning nf2 = new A_MachineLearning(); A_MachineLearning.txtresult2.append(this.txtpath2.getText()); nf2.setVisible(true); } catch (Exception ex) { Logger.getLogger(MachinLearningInterface.class.getName()).log(Level.SEVERE, null, ex); } IJ.run("Close All", ""); if (WindowManager.getFrame("Results") != null) { IJ.selectWindow("Results"); IJ.run("Close"); } if (WindowManager.getFrame("Summary") != null) { IJ.selectWindow("Summary"); IJ.run("Close"); } if (WindowManager.getFrame("Results") != null) { IJ.selectWindow("Results"); IJ.run("Close"); } if (WindowManager.getFrame("ROI Manager") != null) { IJ.selectWindow("ROI Manager"); IJ.run("Close"); } setVisible(false); dispose();// TODO add your handling code here: // TODO add your handling code here: }
From source file:SMO.java
License:Open Source License
/** * Method for building the classifier. Implements a one-against-one * wrapper for multi-class problems.//from w ww . j a v a 2 s . c om * * @param insts the set of training instances * @throws Exception if the classifier can't be built successfully */ public void buildClassifier(Instances insts) throws Exception { if (!m_checksTurnedOff) { // can classifier handle the data? getCapabilities().testWithFail(insts); // remove instances with missing class insts = new Instances(insts); insts.deleteWithMissingClass(); /* Removes all the instances with weight equal to 0. MUST be done since condition (8) of Keerthi's paper is made with the assertion Ci > 0 (See equation (3a). */ Instances data = new Instances(insts, insts.numInstances()); for (int i = 0; i < insts.numInstances(); i++) { if (insts.instance(i).weight() > 0) data.add(insts.instance(i)); } if (data.numInstances() == 0) { throw new Exception("No training instances left after removing " + "instances with weight 0!"); } insts = data; } if (!m_checksTurnedOff) { m_Missing = new ReplaceMissingValues(); m_Missing.setInputFormat(insts); insts = Filter.useFilter(insts, m_Missing); } else { m_Missing = null; } if (getCapabilities().handles(Capability.NUMERIC_ATTRIBUTES)) { boolean onlyNumeric = true; if (!m_checksTurnedOff) { for (int i = 0; i < insts.numAttributes(); i++) { if (i != insts.classIndex()) { if (!insts.attribute(i).isNumeric()) { onlyNumeric = false; break; } } } } if (!onlyNumeric) { m_NominalToBinary = new NominalToBinary(); m_NominalToBinary.setInputFormat(insts); insts = Filter.useFilter(insts, m_NominalToBinary); } else { m_NominalToBinary = null; } } else { m_NominalToBinary = null; } if (m_filterType == FILTER_STANDARDIZE) { m_Filter = new Standardize(); m_Filter.setInputFormat(insts); insts = Filter.useFilter(insts, m_Filter); } else if (m_filterType == FILTER_NORMALIZE) { m_Filter = new Normalize(); m_Filter.setInputFormat(insts); insts = Filter.useFilter(insts, m_Filter); } else { m_Filter = null; } m_classIndex = insts.classIndex(); m_classAttribute = insts.classAttribute(); m_KernelIsLinear = (m_kernel instanceof PolyKernel) && (((PolyKernel) m_kernel).getExponent() == 1.0); // Generate subsets representing each class Instances[] subsets = new Instances[insts.numClasses()]; for (int i = 0; i < insts.numClasses(); i++) { subsets[i] = new Instances(insts, insts.numInstances()); } for (int j = 0; j < insts.numInstances(); j++) { Instance inst = insts.instance(j); subsets[(int) inst.classValue()].add(inst); } for (int i = 0; i < insts.numClasses(); i++) { subsets[i].compactify(); } // Build the binary classifiers Random rand = new Random(m_randomSeed); m_classifiers = new BinarySMO[insts.numClasses()][insts.numClasses()]; for (int i = 0; i < insts.numClasses(); i++) { for (int j = i + 1; j < insts.numClasses(); j++) { m_classifiers[i][j] = new BinarySMO(); m_classifiers[i][j].setKernel(Kernel.makeCopy(getKernel())); Instances data = new Instances(insts, insts.numInstances()); for (int k = 0; k < subsets[i].numInstances(); k++) { data.add(subsets[i].instance(k)); } for (int k = 0; k < subsets[j].numInstances(); k++) { data.add(subsets[j].instance(k)); } data.compactify(); data.randomize(rand); m_classifiers[i][j].buildClassifier(data, i, j, m_fitLogisticModels, m_numFolds, m_randomSeed); } } }
From source file:SpectralClusterer.java
License:Open Source License
/** * Generates a clusterer by the mean of spectral clustering algorithm. * /*from w w w . jav a 2 s.c o m*/ * @param data * set of instances serving as training data * @exception Exception * if the clusterer has not been generated successfully */ public void buildClusterer(Instances data) throws java.lang.Exception { int n = data.numInstances(); int k = data.numAttributes(); DoubleMatrix2D w; if (useSparseMatrix) w = DoubleFactory2D.sparse.make(n, n); else w = DoubleFactory2D.dense.make(n, n); double[][] v1 = new double[n][]; for (int i = 0; i < n; i++) v1[i] = data.instance(i).toDoubleArray(); v = DoubleFactory2D.dense.make(v1); double sigma_sq = sigma * sigma; // Sets up similarity matrix for (int i = 0; i < n; i++) for (int j = i; j < n; j++) { double dist = distnorm2(v.viewRow(i), v.viewRow(j)); if ((r == -1) || (dist < r)) { double sim = Math.exp(-(dist * dist) / (2 * sigma_sq)); w.set(i, j, sim); w.set(j, i, sim); } } // Partitions points int[][] p = partition(w, alpha_star); // Deploys results numOfClusters = p.length; cluster = new int[n]; for (int i = 0; i < p.length; i++) for (int j = 0; j < p[i].length; j++) cluster[p[i][j]] = i; }
From source file:MachinLearningInterface.java
private void jButton7ActionPerformed(java.awt.event.ActionEvent evt) {//GEN-FIRST:event_jButton7ActionPerformed Instances data;//from w w w. j av a 2s.co m try { data = new Instances(new BufferedReader(new FileReader(this.name3 + ".arff"))); Instances newData = null; Add filter; newData = new Instances(data); filter = new Add(); filter.setAttributeIndex("last"); filter.setNominalLabels("rods,punctua,networks"); filter.setAttributeName("target"); filter.setInputFormat(newData); newData = Filter.useFilter(newData, filter); System.out.print(newData); Vector vec = new Vector(); newData.setClassIndex(newData.numAttributes() - 1); if (!newData.equalHeaders(newData)) { throw new IllegalArgumentException("Train and test are not compatible!"); } URL urlToModel = this.getClass().getResource("/" + "Final.model"); InputStream stream = urlToModel.openStream(); Classifier cls = (Classifier) weka.core.SerializationHelper.read(stream); System.out.println("PROVANT MODEL.classifyInstance"); for (int i = 0; i < newData.numInstances(); i++) { double pred = cls.classifyInstance(newData.instance(i)); double[] dist = cls.distributionForInstance(newData.instance(i)); System.out.print((i + 1) + " - "); System.out.print(newData.classAttribute().value((int) pred) + " - "); //txtarea2.setText(Utils.arrayToString(dist)); System.out.println(Utils.arrayToString(dist)); vec.add(newData.classAttribute().value((int) pred)); } int p = 0, n = 0, r = 0; //txtarea2.append(Utils.arrayToString(this.target)); for (Object vec1 : vec) { if ("rods".equals(vec1.toString())) { r = r + 1; } if ("punctua".equals(vec1.toString())) { p = p + 1; } if ("networks".equals(vec1.toString())) { n = n + 1; } PrintWriter out = null; try { out = new PrintWriter(this.name3 + "_morphology.txt"); out.println(vec); out.close(); } catch (Exception ex) { ex.printStackTrace(); } //System.out.println(vec.get(i)); } System.out.println("VECTOR-> punctua: " + p + ", rods: " + r + ", networks: " + n); IJ.showMessage( "Your file:" + this.name3 + "arff" + "\nhas been analysed, and it is composed by-> punctua: " + p + ", rods: " + r + ", networks: " + n); } catch (IOException ex) { Logger.getLogger(MachinLearningInterface.class.getName()).log(Level.SEVERE, null, ex); } catch (Exception ex) { Logger.getLogger(MachinLearningInterface.class.getName()).log(Level.SEVERE, null, ex); } IJ.showMessage("analysing complete "); }
From source file:MachinLearningInterface.java
private void jButton10ActionPerformed(java.awt.event.ActionEvent evt) {//GEN-FIRST:event_jButton10ActionPerformed Instances data;// ww w.j a va 2 s. co m try { data = new Instances(new BufferedReader(new FileReader(this.name3 + ".arff"))); Instances newData = null; Add filter; newData = new Instances(data); filter = new Add(); filter.setAttributeIndex("last"); filter.setNominalLabels(this.liststring); filter.setAttributeName("target"); filter.setInputFormat(newData); newData = Filter.useFilter(newData, filter); System.out.print(newData); Vector vec = new Vector(); newData.setClassIndex(newData.numAttributes() - 1); if (!newData.equalHeaders(newData)) { throw new IllegalArgumentException("Train and test are not compatible!"); } Classifier cls = (Classifier) weka.core.SerializationHelper.read(this.model); System.out.println("PROVANT MODEL.classifyInstance"); for (int i = 0; i < newData.numInstances(); i++) { double pred = cls.classifyInstance(newData.instance(i)); double[] dist = cls.distributionForInstance(newData.instance(i)); System.out.print((i + 1) + " - "); System.out.print(newData.classAttribute().value((int) pred) + " - "); //txtarea2.setText(Utils.arrayToString(dist)); System.out.println(Utils.arrayToString(dist)); vec.add(newData.classAttribute().value((int) pred)); //txtarea2.append(Utils.arrayToString(dist)); } URL urlToModel = this.getClass().getResource("/" + "Final.model"); InputStream stream = urlToModel.openStream(); Classifier cls2 = (Classifier) weka.core.SerializationHelper.read(stream); System.out.println("PROVANT MODEL.classifyInstance"); for (int i = 0; i < newData.numInstances(); i++) { double pred = cls2.classifyInstance(newData.instance(i)); double[] dist = cls2.distributionForInstance(newData.instance(i)); System.out.print((i + 1) + " - "); System.out.print(newData.classAttribute().value((int) pred) + " - "); //txtarea2.setText(Utils.arrayToString(dist)); System.out.println(Utils.arrayToString(dist)); vec.add(newData.classAttribute().value((int) pred)); } int p = 0, n = 0, r = 0; //txtarea2.append(Utils.arrayToString(this.target)); for (Object vec1 : vec) { if ("rods".equals(vec1.toString())) { r = r + 1; } if ("punctua".equals(vec1.toString())) { p = p + 1; } if ("networks".equals(vec1.toString())) { n = n + 1; } PrintWriter out = null; try { out = new PrintWriter(this.name3 + "_morphology.txt"); out.println(vec); out.close(); } catch (Exception ex) { ex.printStackTrace(); } //System.out.println(vec.get(i)); } System.out.println("VECTOR-> punctua: " + p + ", rods: " + r + ", networks: " + n); IJ.showMessage( "Your file:" + this.name3 + "arff" + "\nhas been analysed, and it is composed by-> punctua: " + p + ", rods: " + r + ", networks: " + n); //txtarea2.setText("Your file:" + this.name3 + ".arff" //+ "\nhas been analysed, and it is composed by-> punctua: " + p + ", rods: " + r + ", networks: " + n //+ "\n" //+ "\nAnalyse complete"); //txtarea.setText("Analyse complete"); } catch (IOException ex) { Logger.getLogger(MachinLearningInterface.class.getName()).log(Level.SEVERE, null, ex); } catch (Exception ex) { Logger.getLogger(MachinLearningInterface.class.getName()).log(Level.SEVERE, null, ex); } IJ.showMessage("analysing complete "); // TODO add your handling code here: // TODO add your handling code here: }
From source file:MPCKMeans.java
License:Open Source License
/** * Cluster given instances to form the specified number of clusters. * * @param data instances to be clustered * @param numClusters number of clusters to create * @exception Exception if something goes wrong. *///from ww w .ja v a 2 s .c om public void buildClusterer(Instances data, int numClusters) throws Exception { m_NumClusters = numClusters; System.out.println("Creating " + m_NumClusters + " clusters"); m_Initializer.setNumClusters(m_NumClusters); if (data.instance(0) instanceof SparseInstance) { m_isSparseInstance = true; } buildClusterer(data); }
From source file:MPCKMeans.java
License:Open Source License
/** * Clusters unlabeledData and labeledData (with labels removed), * using constraints in labeledPairs to initialize * * @param labeledPairs labeled pairs to be used to initialize * @param unlabeledData unlabeled instances * @param labeledData labeled instances/* www . j a v a 2 s . com*/ * @param numClusters number of clusters * @param startingIndexOfTest starting index of test set in unlabeled data * @exception Exception if something goes wrong. */ public void buildClusterer(ArrayList labeledPairs, Instances unlabeledData, Instances labeledData, int numClusters, int startingIndexOfTest) throws Exception { m_TotalTrainWithLabels = labeledData; if (labeledPairs != null) { m_SeedHash = new HashSet((int) (unlabeledData.numInstances() / 0.75 + 10)); m_ConstraintsHash = new HashMap(); m_instanceConstraintHash = new HashMap(); for (int i = 0; i < labeledPairs.size(); i++) { InstancePair pair = (InstancePair) labeledPairs.get(i); Integer firstInt = new Integer(pair.first); Integer secondInt = new Integer(pair.second); // for first point if (!m_SeedHash.contains(firstInt)) { // add instances with constraints to seedHash if (m_verbose) { System.out.println("Adding " + firstInt + " to seedHash"); } m_SeedHash.add(firstInt); } // for second point if (!m_SeedHash.contains(secondInt)) { m_SeedHash.add(secondInt); if (m_verbose) { System.out.println("Adding " + secondInt + " to seedHash"); } } if (pair.first >= pair.second) { throw new Exception("Ordering reversed - something wrong!!"); } else { InstancePair newPair = null; newPair = new InstancePair(pair.first, pair.second, InstancePair.DONT_CARE_LINK); m_ConstraintsHash.put(newPair, new Integer(pair.linkType)); // WLOG first < second if (m_verbose) { System.out.println( "Adding constraint (" + pair.first + "," + pair.second + "), " + pair.linkType); } // hash the constraints for the instances involved Object constraintList1 = m_instanceConstraintHash.get(firstInt); if (constraintList1 == null) { ArrayList constraintList = new ArrayList(); constraintList.add(pair); m_instanceConstraintHash.put(firstInt, constraintList); } else { ((ArrayList) constraintList1).add(pair); } Object constraintList2 = m_instanceConstraintHash.get(secondInt); if (constraintList2 == null) { ArrayList constraintList = new ArrayList(); constraintList.add(pair); m_instanceConstraintHash.put(secondInt, constraintList); } else { ((ArrayList) constraintList2).add(pair); } } } } m_StartingIndexOfTest = startingIndexOfTest; if (m_verbose) { System.out.println("Starting index of test: " + m_StartingIndexOfTest); } // learn metric using labeled data, // then cluster both the labeled and unlabeled data System.out.println("Initializing metric: " + m_metric); m_metric.buildMetric(unlabeledData); m_metricBuilt = true; m_metricLearner.setMetric(m_metric); m_metricLearner.setClusterer(this); // normalize all data for SPKMeans if (m_metric.doesNormalizeData()) { for (int i = 0; i < unlabeledData.numInstances(); i++) { m_metric.normalizeInstanceWeighted(unlabeledData.instance(i)); } } // either create a new metric if multiple metrics, // or just point them all to m_metric m_metrics = new LearnableMetric[numClusters]; m_metricLearners = new MPCKMeansMetricLearner[numClusters]; for (int i = 0; i < m_metrics.length; i++) { if (m_useMultipleMetrics) { m_metrics[i] = (LearnableMetric) m_metric.clone(); m_metricLearners[i] = (MPCKMeansMetricLearner) m_metricLearner.clone(); m_metricLearners[i].setMetric(m_metrics[i]); m_metricLearners[i].setClusterer(this); } else { m_metrics[i] = m_metric; m_metricLearners[i] = m_metricLearner; } } buildClusterer(unlabeledData, numClusters); }
From source file:MPCKMeans.java
License:Open Source License
/** * Generates a clusterer. Instances in data have to be * either all sparse or all non-sparse/*from w w w .j a v a 2s . com*/ * * @param data set of instances serving as training data * @exception Exception if the clusterer has not been * generated successfully */ public void buildClusterer(Instances data) throws Exception { System.out.println("ML weight=" + m_MLweight); System.out.println("CL weight= " + m_CLweight); System.out.println("LOG term weight=" + m_logTermWeight); System.out.println("Regularizer weight= " + m_regularizerTermWeight); m_RandomNumberGenerator = new Random(m_RandomSeed); if (m_metric instanceof OfflineLearnableMetric) { m_isOfflineMetric = true; } else { m_isOfflineMetric = false; } // Don't rebuild the metric if it was already trained if (!m_metricBuilt) { m_metric.buildMetric(data); m_metricBuilt = true; m_metricLearner.setMetric(m_metric); m_metricLearner.setClusterer(this); m_metrics = new LearnableMetric[m_NumClusters]; m_metricLearners = new MPCKMeansMetricLearner[m_NumClusters]; for (int i = 0; i < m_metrics.length; i++) { if (m_useMultipleMetrics) { m_metrics[i] = (LearnableMetric) m_metric.clone(); m_metricLearners[i] = (MPCKMeansMetricLearner) m_metricLearner.clone(); m_metricLearners[i].setMetric(m_metrics[i]); m_metricLearners[i].setClusterer(this); } else { m_metrics[i] = m_metric; m_metricLearners[i] = m_metricLearner; } } } setInstances(data); m_ClusterCentroids = new Instances(m_Instances, m_NumClusters); m_ClusterAssignments = new int[m_Instances.numInstances()]; if (m_Instances.checkForNominalAttributes() && m_Instances.checkForStringAttributes()) { throw new UnsupportedAttributeTypeException("Cannot handle nominal attributes\n"); } m_ClusterCentroids = m_Initializer.initialize(); // if all instances are smoothed by the metric, the centroids // need to be smoothed too (note that this is independent of // centroid smoothing performed by K-Means) if (m_metric instanceof InstanceConverter) { System.out.println("Converting centroids..."); Instances convertedCentroids = new Instances(m_ClusterCentroids, m_NumClusters); for (int i = 0; i < m_ClusterCentroids.numInstances(); i++) { Instance centroid = m_ClusterCentroids.instance(i); convertedCentroids.add(((InstanceConverter) m_metric).convertInstance(centroid)); } m_ClusterCentroids.delete(); for (int i = 0; i < convertedCentroids.numInstances(); i++) { m_ClusterCentroids.add(convertedCentroids.instance(i)); } } System.out.println("Done initializing clustering ..."); getIndexClusters(); if (m_verbose && m_Seedable) { printIndexClusters(); for (int i = 0; i < m_NumClusters; i++) { System.out.println("Centroid " + i + ": " + m_ClusterCentroids.instance(i)); } } // Some extra work for smoothing metrics if (m_metric instanceof SmoothingMetric && ((SmoothingMetric) m_metric).getUseSmoothing()) { SmoothingMetric smoothingMetric = (SmoothingMetric) m_metric; Instances smoothedCentroids = new Instances(m_Instances, m_NumClusters); for (int i = 0; i < m_ClusterCentroids.numInstances(); i++) { Instance smoothedCentroid = smoothingMetric.smoothInstance(m_ClusterCentroids.instance(i)); smoothedCentroids.add(smoothedCentroid); } m_ClusterCentroids = smoothedCentroids; updateSmoothingMetrics(); } runKMeans(); }
From source file:MPCKMeans.java
License:Open Source License
/** Sets training instances */ public void setInstances(Instances instances) { m_Instances = instances;/*w w w. j a va2s . c om*/ // create the checksum coefficients m_checksumCoeffs = new double[instances.numAttributes()]; for (int i = 0; i < m_checksumCoeffs.length; i++) { m_checksumCoeffs[i] = m_RandomNumberGenerator.nextDouble(); } // hash the instance checksums m_checksumHash = new HashMap(instances.numInstances()); int classIdx = instances.classIndex(); for (int i = 0; i < instances.numInstances(); i++) { Instance instance = instances.instance(i); double[] values = instance.toDoubleArray(); double checksum = 0; for (int j = 0; j < values.length; j++) { if (j != classIdx) { checksum += m_checksumCoeffs[j] * values[j]; } } // take care of chaining Object list = m_checksumHash.get(new Double((float) checksum)); ArrayList idxList = null; if (list == null) { idxList = new ArrayList(); m_checksumHash.put(new Double((float) checksum), idxList); } else { // chaining idxList = (ArrayList) list; } idxList.add(new Integer(i)); } }
From source file:MPCKMeans.java
License:Open Source License
public static void testCase() { try {//w w w . j a va 2s .c o m String dataset = new String("lowd"); //String dataset = new String("highd"); if (dataset.equals("lowd")) { //////// Low-D data // String datafile = "/u/ml/data/bio/arffFromPhylo/ecoli_K12-100.arff"; // String datafile = "/u/sugato/weka/data/digits-0.1-389.arff"; String datafile = "/u/sugato/weka/data/iris.arff"; int numPairs = 200, num = 0; // set up the data FileReader reader = new FileReader(datafile); Instances data = new Instances(reader); // Make the last attribute be the class int classIndex = data.numAttributes() - 1; data.setClassIndex(classIndex); // starts with 0 System.out.println("ClassIndex is: " + classIndex); // Remove the class labels before clustering Instances clusterData = new Instances(data); clusterData.deleteClassAttribute(); // create the pairs ArrayList labeledPair = InstancePair.getPairs(data, numPairs); System.out.println("Finished initializing constraint matrix"); MPCKMeans mpckmeans = new MPCKMeans(); mpckmeans.setUseMultipleMetrics(false); System.out.println("\nClustering the data using MPCKmeans...\n"); WeightedEuclidean metric = new WeightedEuclidean(); WEuclideanLearner metricLearner = new WEuclideanLearner(); // LearnableMetric metric = new WeightedDotP(); // MPCKMeansMetricLearner metricLearner = new DotPGDLearner(); // KL metric = new KL(); // KLGDLearner metricLearner = new KLGDLearner(); // ((KL)metric).setUseIDivergence(true); // BarHillelMetric metric = new BarHillelMetric(); // BarHillelMetricMatlab metric = new BarHillelMetricMatlab(); // XingMetric metric = new XingMetric(); // WeightedMahalanobis metric = new WeightedMahalanobis(); mpckmeans.setMetric(metric); mpckmeans.setMetricLearner(metricLearner); mpckmeans.setVerbose(false); mpckmeans.setRegularize(false); mpckmeans.setTrainable(new SelectedTag(TRAINING_INTERNAL, TAGS_TRAINING)); mpckmeans.setSeedable(true); mpckmeans.buildClusterer(labeledPair, clusterData, data, data.numClasses(), data.numInstances()); mpckmeans.getIndexClusters(); mpckmeans.printIndexClusters(); SemiSupClustererEvaluation eval = new SemiSupClustererEvaluation(mpckmeans.m_TotalTrainWithLabels, mpckmeans.m_TotalTrainWithLabels.numClasses(), mpckmeans.m_TotalTrainWithLabels.numClasses()); eval.evaluateModel(mpckmeans, mpckmeans.m_TotalTrainWithLabels, mpckmeans.m_Instances); System.out.println("MI=" + eval.mutualInformation()); System.out.print("FM=" + eval.pairwiseFMeasure()); System.out.print("\tP=" + eval.pairwisePrecision()); System.out.print("\tR=" + eval.pairwiseRecall()); } else if (dataset.equals("highd")) { //////// Newsgroup data String datafile = "/u/ml/users/sugato/groupcode/weka335/data/arffFromCCS/sanitized/different-1000_sanitized.arff"; //String datafile = "/u/ml/users/sugato/groupcode/weka335/data/20newsgroups/small-newsgroup_fromCCS.arff"; //String datafile = "/u/ml/users/sugato/groupcode/weka335/data/20newsgroups/same-100_fromCCS.arff"; // set up the data FileReader reader = new FileReader(datafile); Instances data = new Instances(reader); // Make the last attribute be the class int classIndex = data.numAttributes() - 1; data.setClassIndex(classIndex); // starts with 0 System.out.println("ClassIndex is: " + classIndex); // Remove the class labels before clustering Instances clusterData = new Instances(data); clusterData.deleteClassAttribute(); // create the pairs int numPairs = 0, num = 0; ArrayList labeledPair = new ArrayList(numPairs); Random rand = new Random(42); System.out.println("Initializing constraint matrix:"); while (num < numPairs) { int i = (int) (data.numInstances() * rand.nextFloat()); int j = (int) (data.numInstances() * rand.nextFloat()); int first = (i < j) ? i : j; int second = (i >= j) ? i : j; int linkType = (data.instance(first).classValue() == data.instance(second).classValue()) ? InstancePair.MUST_LINK : InstancePair.CANNOT_LINK; InstancePair pair = new InstancePair(first, second, linkType); if (first != second && !labeledPair.contains(pair)) { labeledPair.add(pair); //System.out.println(num + "th entry is: " + pair); num++; } } System.out.println("Finished initializing constraint matrix"); MPCKMeans mpckmeans = new MPCKMeans(); mpckmeans.setUseMultipleMetrics(false); System.out.println("\nClustering the highd data using MPCKmeans...\n"); LearnableMetric metric = new WeightedDotP(); MPCKMeansMetricLearner metricLearner = new DotPGDLearner(); // KL metric = new KL(); // KLGDLearner metricLearner = new KLGDLearner(); mpckmeans.setMetric(metric); mpckmeans.setMetricLearner(metricLearner); mpckmeans.setVerbose(false); mpckmeans.setRegularize(true); mpckmeans.setTrainable(new SelectedTag(TRAINING_INTERNAL, TAGS_TRAINING)); mpckmeans.setSeedable(true); mpckmeans.buildClusterer(labeledPair, clusterData, data, data.numClasses(), data.numInstances()); mpckmeans.getIndexClusters(); SemiSupClustererEvaluation eval = new SemiSupClustererEvaluation(mpckmeans.m_TotalTrainWithLabels, mpckmeans.m_TotalTrainWithLabels.numClasses(), mpckmeans.m_TotalTrainWithLabels.numClasses()); mpckmeans.getMetric().resetMetric(); // Vital: to reset m_attrWeights to 1 for proper normalization eval.evaluateModel(mpckmeans, mpckmeans.m_TotalTrainWithLabels, mpckmeans.m_Instances); System.out.println("MI=" + eval.mutualInformation()); System.out.print("FM=" + eval.pairwiseFMeasure()); System.out.print("\tP=" + eval.pairwisePrecision()); System.out.print("\tR=" + eval.pairwiseRecall()); } } catch (Exception e) { e.printStackTrace(); } }