List of usage examples for weka.clusterers SimpleKMeans clusterInstance
@Override public int clusterInstance(Instance instance) throws Exception
From source file:analysis.Purity.java
/** * /*from w ww .j av a 2s.co m*/ * @param k number of clusters * @param originalfile original data * @param imputedfile imputed data * @throws Exception */ public void findpurity(int k, String originalfile, String imputedfile) throws Exception { //get original data ConverterUtils.DataSource source = new ConverterUtils.DataSource(originalfile); // get imputed data ConverterUtils.DataSource mysource = new ConverterUtils.DataSource(imputedfile); //get instances for clustering this.instances = source.getDataSet(); this.myinstances = mysource.getDataSet(); //Simple Kmeans for clustering SimpleKMeans globalkmeans = new SimpleKMeans(); SimpleKMeans mykmeans = new SimpleKMeans(); //set number of clusters globalkmeans.setNumClusters(k); mykmeans.setNumClusters(k); // build clusters globalkmeans.buildClusterer(instances); mykmeans.buildClusterer(myinstances); // to compare clusters create matrix for original data and imputed data // this matrix indicates the instances in the came clusters original = new Matrix(instances.numInstances(), k); imputed = new Matrix(myinstances.numInstances(), k); // get cluster numbers for each instance and initialize associated cluster value to 1 for (int i = 0; i < myinstances.numInstances(); i++) { //System.out.println(instances.instance(i)); original.set(i, globalkmeans.clusterInstance(instances.instance(i)), 1); imputed.set(i, mykmeans.clusterInstance(myinstances.instance(i)), 1); } System.out.println("k is: \t" + original.getColumnDimension()); //System.out.println(imputed.getRowDimension()); original = original.times(original.transpose()); imputed = imputed.times(imputed.transpose()); int total1 = 0;// to count instances in the imputed data in the same cluster int total2 = 0; // to count instances in the original data in the same cluster //int value = 1; for (int i = 0; i < original.getRowDimension(); i++) { for (int j = i; j < original.getColumnDimension(); j++) { if ((original.get(i, j) == 1)) { if (imputed.get(i, j) == 1) { total1++; // if i and j th instance in the same cluster in the imputed data } total2++;// if the i and j th instance in the same cluster in the original data } } //System.out.println(); } // calculate purity double purity; purity = (double) total1 / (double) total2; System.out.println("WCSS --> Original Data: " + mykmeans.getSquaredError()); System.out.println("WCSS --> Imputed Data: " + globalkmeans.getSquaredError()); // System.out.println("Total Hit is \t" + total1); //System.out.println("Total for hit is \t" + total2); System.out.println("Purity is: " + purity); }
From source file:analysis.SilhouetteIndex.java
public double calculateIndex(SimpleKMeans sk, Instances inst, int c) throws Exception { //Map<Integer, Instances> clustermap = sk.clusterInstance; sk.setNumClusters(c);/* w ww .ja v a2s. c o m*/ sk.buildClusterer(inst); EuclideanDistance ed = new EuclideanDistance(); double avgSilhouetteOverAllPoints = 0.d; if (sk.getNumClusters() == 1) { //Index is not defined for k=1. needs at least 2 clusters return Double.NaN; } for (int i = 0; i < inst.numInstances(); i++) { //for the current element get its cluster int currentcluster = sk.clusterInstance(inst.instance(i)); //System.out.println(inst.instance(i).value(2)); double[] current_attr = new double[inst.numAttributes()]; double[] other_attr = new double[inst.numAttributes()]; //get attributes of the current instance for (int attr = 0; attr < inst.numAttributes(); attr++) { current_attr[attr] = inst.instance(i).value(attr); } // int counter double[] distances = new double[sk.getNumClusters()]; int[] counters = new int[sk.getNumClusters()]; //System.out.println("distances: "+distances.length); double avgInClusterDist = 0, dist = 0; int countsamecluster = 0; distances[currentcluster] = Double.MAX_VALUE; for (int j = 0; j < inst.numInstances(); j++) { for (int attr = 0; attr < inst.numAttributes(); attr++) { other_attr[attr] = inst.instance(j).value(attr); } //get cluster number of j th element int clusternumber = sk.clusterInstance(inst.instance(j)); //check if j and i in the same cluster if (clusternumber == currentcluster) { if (inst.instance(i) != inst.instance(j)) { //calculate average dist to other elements in the cluster //inst. dist = ed.compute(current_attr, other_attr); avgInClusterDist = avgInClusterDist + dist; countsamecluster++; } } else { dist = ed.compute(current_attr, other_attr); distances[clusternumber] = distances[clusternumber] + dist; counters[clusternumber]++; } } //calculate value ai if (countsamecluster > 0) { avgInClusterDist = avgInClusterDist / countsamecluster; //this is value ai } //find average distances to other clusters for (int k = 0; k < distances.length; k++) { if (k != currentcluster) { distances[k] = distances[k] / counters[k]; } } //Find the min value of average distance to other clusters double min = distances[0]; for (int k = 1; k < distances.length; k++) { if (min > distances[k]) { min = distances[k]; } } //si for current element: double si; // if we only have one element in our cluster it makes sense to set // si = 0 if (countsamecluster == 1) { si = 0.0d; } else { si = (min - avgInClusterDist) / Math.max(min, avgInClusterDist); } avgSilhouetteOverAllPoints = avgSilhouetteOverAllPoints + si; } //System.out.println(inst.numInstances()); return avgSilhouetteOverAllPoints / inst.numInstances(); }
From source file:detplagiasi.KMeansClustering.java
KMeansClustering() { addd = Container.getAddress(); try {/*w w w . java 2 s . c om*/ ClusterEvaluation eval; Instances data; String[] options; SimpleKMeans cl; File he = getArffFile(); data = new Instances(new BufferedReader(new FileReader(he))); System.out.println("-----KMeans Clustering-----"); // normal try (BufferedWriter out = new BufferedWriter(new FileWriter(addd + "\\output.txt", true))) { out.write("\r\n--> normal\r\n"); options = new String[2]; options[0] = "-t"; options[1] = he.getAbsolutePath(); out.write("\r\n" + ClusterEvaluation.evaluateClusterer(new SimpleKMeans(), options) + "\r\n"); out.write("\r\n"); // manual call out.write("\n--> manual\r\n"); cl = new SimpleKMeans(); cl.setNumClusters(4); out.write("\r\n"); cl.buildClusterer(data); getDataUji(); System.out.println("jumlah kluster = " + cl.numberOfClusters()); System.out.println("kluster = " + cl.clusterInstance(dataUji.instance(0))); noClusterUji = cl.clusterInstance(dataUji.instance(0)); totalCluster = cl.numberOfClusters(); for (int b = 0; b < dataTraining.numInstances(); b++) { System.out.print("file " + td.fileName[b] + " termasuk cluster ke "); System.out.println(cl.clusterInstance(dataTraining.instance(b))); array1[b] = td.fileName[b]; array2[b] = cl.clusterInstance(dataTraining.instance(b)); //simpan nilai instance ke dalam sebuah array int buat dikirim ke detplaggui } out.write("\r\n"); eval = new ClusterEvaluation(); eval.setClusterer(cl); eval.evaluateClusterer(new Instances(data)); out.write("\r\n\n# of clusters: " + eval.getNumClusters()); } catch (Exception e) { System.err.println(e.getMessage()); System.out.println("error2 kmeans cluster"); } } catch (IOException ex) { Logger.getLogger(Clustering.class.getName()).log(Level.SEVERE, null, ex); System.out.println("errorrrr null kmeans"); } }
From source file:entities.ArffFile.java
/** * Dada una lista de parametros, se ejecuta el filtro de microagregacion. * Todos estos parametros son entrada del usuario. * @param df Puede ser Euclidian o Manhattan distance, se especifica en la entrada. * @param numCluster// w ww . jav a 2s . c o m * @param seed * @param maxIterations * @param replaceMissingValues * @param preserveInstancesOrder * @param attributes lista de los atributos que se desean generalizar con cluster */ public void microAgregacion(DistanceFunction df, int numCluster, int seed, int maxIterations, boolean replaceMissingValues, boolean preserveInstancesOrder, List<Integer> attributes) throws Exception { //instancesFilter = new Instances(instances); SimpleKMeans kMeans; kMeans = new SimpleKMeans(); Instances uniqueAttributes; uniqueAttributes = new Instances(instancesFilter); List<String> names = new ArrayList<>(); int i = 0; for (Integer attribute : attributes) { String name = new String(instancesFilter.attribute(attribute).name()); if (instancesFilter.attribute(attribute).isDate() || instancesFilter.attribute(attribute).isString()) throw new Exception("No se puede hacer cluster con atributos de tipo DATE o STRING"); names.add(name); } while (uniqueAttributes.numAttributes() != attributes.size()) { if (!names.contains(uniqueAttributes.attribute(i).name())) uniqueAttributes.deleteAttributeAt(i); else i++; } try { kMeans.setNumClusters(numCluster); kMeans.setMaxIterations(maxIterations); kMeans.setSeed(seed); kMeans.setDisplayStdDevs(false); kMeans.setDistanceFunction(df); kMeans.setDontReplaceMissingValues(replaceMissingValues); kMeans.setPreserveInstancesOrder(preserveInstancesOrder); kMeans.buildClusterer(uniqueAttributes); //System.out.println(kMeans); for (int j = 0; j < uniqueAttributes.numInstances(); j++) { int cluster = kMeans.clusterInstance(uniqueAttributes.instance(j)); for (int k = 0; k < uniqueAttributes.numAttributes(); k++) { if (uniqueAttributes.attribute(k).isNumeric()) uniqueAttributes.instance(j).setValue(k, Double.parseDouble(kMeans.getClusterCentroids().instance(cluster).toString(k))); else uniqueAttributes.instance(j).setValue(k, kMeans.getClusterCentroids().instance(cluster).toString(k)); } } replaceValues(uniqueAttributes, attributes); } catch (Exception ex) { Logger.getLogger(ArffFile.class.getName()).log(Level.SEVERE, null, ex); } //saveToFile("4"); }
From source file:net.sf.mzmine.modules.peaklistmethods.dataanalysis.clustering.simplekmeans.SimpleKMeansClusterer.java
License:Open Source License
@Override public ClusteringResult performClustering(Instances dataset, ParameterSet parameters) { List<Integer> clusters = new ArrayList<Integer>(); String[] options = new String[2]; SimpleKMeans clusterer = new SimpleKMeans(); int numberOfGroups = parameters.getParameter(SimpleKMeansClustererParameters.numberOfGroups).getValue(); options[0] = "-N"; options[1] = String.valueOf(numberOfGroups); try {/*from w w w . ja va2 s. com*/ clusterer.setOptions(options); clusterer.buildClusterer(dataset); Enumeration<?> e = dataset.enumerateInstances(); while (e.hasMoreElements()) { clusters.add(clusterer.clusterInstance((Instance) e.nextElement())); } ClusteringResult result = new ClusteringResult(clusters, null, clusterer.numberOfClusters(), parameters.getParameter(EMClustererParameters.visualization).getValue()); return result; } catch (Exception ex) { logger.log(Level.SEVERE, null, ex); return null; } }
From source file:org.knime.knip.suise.node.boundarymodel.contourdata.ContourDataFromClusterSelection.java
License:Open Source License
/** * {@inheritDoc}/*w w w . j a va 2 s . c o m*/ */ @Override protected void extractContourData(int[] translations, int[] permutation) { SimpleKMeans clusterer = new SimpleKMeans(); try { clusterer.setNumClusters(m_numClusters); // cluster the data ArrayList<Attribute> attInfo = new ArrayList<Attribute>(); for (int a = 0; a < contourDataGrid().numFeatures(); a++) { attInfo.add(new Attribute("att" + a)); } Instances data = new Instances("dataset", attInfo, contourDataGrid().numVectors()); for (double[] vec : contourDataGrid()) { data.add(new DenseInstance(1.0, vec)); } clusterer.buildClusterer(data); // create clustered images p(C|x) Img[] imgs = new Img[m_numClusters]; int[] dims = new int[] { contourDataGrid().width(), contourDataGrid().totalLength() }; Cursor<FloatType>[] cursors = new Cursor[m_numClusters]; for (int i = 0; i < imgs.length; i++) { imgs[i] = new ArrayImgFactory<FloatType>().create(dims, new FloatType()); cursors[i] = imgs[i].localizingCursor(); } int cluster; for (Instance instance : data) { for (int i = 0; i < cursors.length; i++) { cursors[i].fwd(); } cluster = clusterer.clusterInstance(instance); cursors[cluster].get().set(1.0f); } // greedily select the best cluster combination starting with all // clusters together and then removing the one whose removal // maximises the score of the remaining clusters Img<FloatType> res = imgs[0].factory().create(imgs[0], new FloatType()); Cursor<FloatType> resC = res.cursor(); while (resC.hasNext()) { resC.fwd(); resC.get().set(1.0f); } Img<FloatType> tmp = res.factory().create(res, new FloatType()); // TODO: normalize img // NormalizeIterableInterval<FloatType, Img<FloatType>> imgNorm = // new NormalizeIterableInterval<FloatType, Img<FloatType>>(); double score = 0; double bestScore = -Double.MAX_VALUE; double globalBestScore = -Double.MAX_VALUE; int bestCluster = 0; // ShowInSameFrame showInFrame = new ShowInSameFrame(); for (int i = 0; i < m_numClusters; i++) { for (int j = 0; j < m_numClusters; j++) { if (imgs[j] != null) { substract(res, imgs[j], tmp); score = calcScore(tmp, m_bias); if (score > bestScore) { bestScore = score; bestCluster = j; } } } substract(res, imgs[bestCluster], res); imgs[bestCluster] = null; // Pair<FloatType, FloatType> minmax = // Operations.compute(new MinMax<FloatType>(), tmp); // Operations.<FloatType, FloatType> map( // new Normalize<FloatType>(minmax.getA().getRealDouble(), // minmax.getB().getRealDouble(), // -Float.MAX_VALUE, Float.MAX_VALUE)).compute( // tmp, tmp); // showInFrame.show(tmp, 2.0); if (bestScore < globalBestScore) { break; } globalBestScore = bestScore; bestScore = -Double.MAX_VALUE; } // calculate the translations (mean positions) resC = res.localizingCursor(); double meanPos = 0; double num = 0; int index = 0; while (resC.hasNext()) { resC.fwd(); meanPos += resC.get().get() * resC.getDoublePosition(0); num += resC.get().get(); index++; if ((index % res.dimension(0)) == 0) { if (num > 0) { translations[(int) ((index - 1) / res.dimension(0))] = (int) Math.round(meanPos / num) - CENTER_COL; } else { // setWeight((int)((index - 1) / res.dimension(0)), 0); translations[(int) ((index - 1) / res.dimension(0))] = 0; } meanPos = 0; num = 0; } } } catch (Exception e) { // TODO Auto-generated catch block } }