List of usage examples for weka.clusterers SimpleKMeans setNumClusters
@Override public void setNumClusters(int n) throws Exception
From source file:lineage.AAFClusterer.java
License:Open Source License
/** * K-Means Clustering//from w w w . ja v a 2s . c om * @param data - matrix of observations (numObs x numFeatures) * @param k - number of clusters */ public Cluster[] kmeans(double[][] data, int numObs, int numFeatures, int k) { Instances ds = convertMatrixToWeka(data, numObs, numFeatures); // uses Euclidean distance by default SimpleKMeans clusterer = new SimpleKMeans(); try { clusterer.setPreserveInstancesOrder(true); clusterer.setNumClusters(k); clusterer.buildClusterer(ds); // cluster centers Instances centers = clusterer.getClusterCentroids(); Cluster[] clusters = new Cluster[centers.numInstances()]; for (int i = 0; i < centers.numInstances(); i++) { Instance inst = centers.instance(i); double[] mean = new double[inst.numAttributes()]; for (int j = 0; j < mean.length; j++) { mean[j] = inst.value(j); } clusters[i] = new Cluster(mean, i); } // cluster members int[] assignments = clusterer.getAssignments(); for (int i = 0; i < assignments.length; i++) { clusters[assignments[i]].addMember(i); } return clusters; } catch (Exception e) { e.printStackTrace(); System.exit(-1); return null; } }
From source file:lu.lippmann.cdb.lab.kmeans.KmeansImproved.java
License:Open Source License
/** * /*from www .j a v a 2 s . c o m*/ * @return * @throws Exception */ public double[] getClusteredInstances() throws Exception { //Removing potential class index instances.setClassIndex(-1); //Clustering using Kmeans int k; double max = 0, r2 = 0, pseudoF = 0; //Testing from 2 to 10 clusters, should be set as entry of this function SimpleKMeans bestKMeans = new SimpleKMeans(); for (k = 2; k <= maxClusters; k++) { final SimpleKMeans kMeans = new SimpleKMeans(); kMeans.setNumClusters(k); kMeans.buildClusterer(instances); //Choosing the "optimal" number of clusters r2 = R2(kMeans); pseudoF = pseudoF(r2, k); //System.out.println(pseudo_f); if (pseudoF > max) { max = pseudoF; bestKMeans = kMeans; } } //Real clustering using the chosen number final ClusterEvaluation eval = new ClusterEvaluation(); eval.setClusterer(bestKMeans); eval.evaluateClusterer(instances); double[] clusterAssignments = eval.getClusterAssignments(); this.usedKmeans = bestKMeans; return clusterAssignments; }
From source file:milk.classifiers.MIRBFNetwork.java
License:Open Source License
public Exemplars transform(Exemplars ex) throws Exception { // Throw all the instances together Instances data = new Instances(ex.exemplar(0).getInstances()); for (int i = 0; i < ex.numExemplars(); i++) { Exemplar curr = ex.exemplar(i);/*from w ww. ja v a 2 s . c om*/ double weight = 1.0 / (double) curr.getInstances().numInstances(); for (int j = 0; j < curr.getInstances().numInstances(); j++) { Instance inst = (Instance) curr.getInstances().instance(j).copy(); inst.setWeight(weight); data.add(inst); } } double factor = (double) data.numInstances() / (double) data.sumOfWeights(); for (int i = 0; i < data.numInstances(); i++) { data.instance(i).setWeight(data.instance(i).weight() * factor); } SimpleKMeans kMeans = new SimpleKMeans(); kMeans.setNumClusters(m_num_clusters); MakeDensityBasedClusterer clust = new MakeDensityBasedClusterer(); clust.setClusterer(kMeans); m_clm.setDensityBasedClusterer(clust); m_clm.setIgnoredAttributeIndices("" + (ex.exemplar(0).idIndex() + 1)); m_clm.setInputFormat(data); // Use filter and discard result Instances tempData = Filter.useFilter(data, m_clm); tempData = new Instances(tempData, 0); tempData.insertAttributeAt(ex.exemplar(0).getInstances().attribute(0), 0); // Go through exemplars and add them to new dataset Exemplars newExs = new Exemplars(tempData); for (int i = 0; i < ex.numExemplars(); i++) { Exemplar curr = ex.exemplar(i); Instances temp = Filter.useFilter(curr.getInstances(), m_clm); temp.insertAttributeAt(ex.exemplar(0).getInstances().attribute(0), 0); for (int j = 0; j < temp.numInstances(); j++) { temp.instance(j).setValue(0, curr.idValue()); } newExs.add(new Exemplar(temp)); } //System.err.println("Finished transforming"); //System.err.println(newExs); return newExs; }
From source file:myclusterer.WekaCode.java
public static Clusterer buildClusterer(Instances dataSet, int clusterType) throws Exception { Clusterer clusterer = null;/* w ww. j a v a 2 s.co m*/ if (clusterType == SimpleKMeans) { SimpleKMeans kmeans = new SimpleKMeans(); Scanner scan = new Scanner(System.in); System.out.print("Masukkan jumlah cluster: "); int K = scan.nextInt(); kmeans.setNumClusters(K); clusterer = kmeans; clusterer.buildClusterer(dataSet); } else if (clusterType == HierarchicalClusterer) { HierarchicalClusterer hierarchical = new HierarchicalClusterer(); Scanner scan = new Scanner(System.in); System.out.print("Masukkan jumlah cluster: "); int K = scan.nextInt(); hierarchical.setNumClusters(K); clusterer = hierarchical; clusterer.buildClusterer(dataSet); } else if (clusterType == MyKMeans) { MyKMeans kmeans = new MyKMeans(); Scanner scan = new Scanner(System.in); System.out.print("Masukkan jumlah cluster: "); int K = scan.nextInt(); kmeans.setNumClusters(K); clusterer = kmeans; clusterer.buildClusterer(dataSet); } else if (clusterType == MyAgnes) { MyAgnes agnes = new MyAgnes(); Scanner scan = new Scanner(System.in); System.out.print("Masukkan jumlah cluster: "); int K = scan.nextInt(); agnes.setNumClusters(K); clusterer = agnes; clusterer.buildClusterer(dataSet); } return clusterer; }
From source file:net.sf.markov4jmeter.behaviormodelextractor.extraction.transformation.clustering.KMeansClusteringStrategy.java
License:Apache License
/** * {@inheritDoc}/*from ww w .j a v a 2 s. c om*/ * * <p> * This method is specialized for <b>kmeans</b> clustering. */ @Override public BehaviorMix apply(final BehaviorModelAbsolute[] behaviorModelsAbsolute, final UseCaseRepository useCaseRepository) { final ABMToRBMTransformer abmToRbmTransformer = new ABMToRBMTransformer(); // Behavior Mix to be returned; final BehaviorMix behaviorMix = this.createBehaviorMix(); try { // Returns a valid instances set, generated based on the absolut // behavior models Instances instances = getInstances(behaviorModelsAbsolute); // KMeans --> Weka SimpleKMeans kmeans = new SimpleKMeans(); // DistanceFunction manhattanDistance = new ManhattanDistance(); // String[] options = new String[1]; // options[0] = "-D"; // manhattanDistance.setOptions(options); // manhattanDistance.setInstances(instances); // kmeans.setDistanceFunction(manhattanDistance); // distance function with option don*t normalize DistanceFunction euclideanDistance = new EuclideanDistance(); // String[] options = new String[1]; // options[0] = "-D"; // euclideanDistance.setOptions(options); euclideanDistance.setInstances(instances); kmeans.setDistanceFunction(euclideanDistance); kmeans.setPreserveInstancesOrder(true); int[] clustersize = null; int[] assignments = null; // get number of clusters to be generated. int numberOfClusters = Integer.parseInt(CommandLineArgumentsHandler.getNumberOfClustersMin()); // clustering for (int clusterSize = numberOfClusters; clusterSize <= numberOfClusters; clusterSize++) { // must be specified in a fix way kmeans.setNumClusters(clusterSize); // build cluster kmeans.buildClusterer(instances); clustersize = kmeans.getClusterSizes(); assignments = kmeans.getAssignments(); ClusteringMetrics clusteringMetrics = new ClusteringMetrics(); clusteringMetrics.calculateInterClusteringSimilarity(kmeans.getClusterCentroids()); clusteringMetrics.calculateIntraClusteringSimilarity(kmeans.getClusterCentroids(), instances, assignments); clusteringMetrics.calculateBetas(); clusteringMetrics.printErrorMetricsHeader(); clusteringMetrics.printErrorMetrics(kmeans.getClusterCentroids().numInstances()); clusteringMetrics.printClusteringMetrics(clustersize, assignments, instances); // clusteringMetrics.printClusterAssignmentsToSession(assignments, // clusterSize); } Instances resultingCentroids = kmeans.getClusterCentroids(); // for each centroid instance, create new behaviorModelRelative for (int i = 0; i < resultingCentroids.numInstances(); i++) { Instance centroid = resultingCentroids.instance(i); // create a Behavior Model, which includes all vertices only; // the vertices are associated with the use cases, and a // dedicated // vertex that represents the final state will be added; final BehaviorModelAbsolute behaviorModelAbsoluteCentroid = this .createBehaviorModelAbsoluteWithoutTransitions(useCaseRepository.getUseCases()); // install the transitions in between vertices; this.installTransitions(behaviorModelsAbsolute, behaviorModelAbsoluteCentroid, centroid, assignments, i); // convert absolute to relative behaviorModel final BehaviorModelRelative behaviorModelRelative = abmToRbmTransformer .transform(behaviorModelAbsoluteCentroid); // relative Frequency of cluster i double relativeFrequency = (double) clustersize[i] / (double) instances.numInstances(); // create the (unique) Behavior Mix entry to be returned; final BehaviorMixEntry behaviorMixEntry = this.createBehaviorMixEntry( AbstractClusteringStrategy.GENERIC_BEHAVIOR_MODEL_NAME, relativeFrequency, // relative frequency; behaviorModelRelative); // add to resulting behaviorMix behaviorMix.getEntries().add(behaviorMixEntry); } return behaviorMix; } catch (ExtractionException e) { e.printStackTrace(); } catch (Exception e) { e.printStackTrace(); } // if any error occurs, an ExtractionExeption should be thrown, // indicating the error that occurred; // the classes "NoClusteringStrategy" and "SimpleClusteringStrategy" // should give an idea for handling the Behavior Models and how to // use the helping methods of the (abstract) parent class. return behaviorMix; }
From source file:nl.uva.sne.classifiers.Kmeans.java
@Override public Map<String, String> cluster(String inDir) throws IOException, ParseException { try {// www.j av a2 s .c om Instances data = ClusterUtils.terms2Instances(inDir, false); DistanceFunction df; // SimpleKMeans currently only supports the Euclidean and Manhattan distances. switch (distanceFunction) { case "Euclidean": df = new EuclideanDistance(data); break; case "Manhattan": df = new ManhattanDistance(data); break; default: df = new EuclideanDistance(data); break; } SimpleKMeans clusterer = new SimpleKMeans(); Random rand = new Random(System.currentTimeMillis()); int seed = rand.nextInt((Integer.MAX_VALUE - 1000000) + 1) + 1000000; clusterer.setSeed(seed); clusterer.setMaxIterations(1000000000); Logger.getLogger(Kmeans.class.getName()).log(Level.INFO, "Start clusteing"); clusterer.setPreserveInstancesOrder(true); clusterer.setNumClusters(numOfClusters); clusterer.setDistanceFunction(df); return ClusterUtils.bulidClusters(clusterer, data, inDir); } catch (Exception ex) { Logger.getLogger(Kmeans.class.getName()).log(Level.SEVERE, null, ex); } return null; }
From source file:org.knime.knip.suise.node.boundarymodel.BoundaryModel.java
License:Open Source License
protected void unmodifiedContourData() throws Exception { m_contourData = new ContourDataExtractor() { protected void extractContourData(int[] translations, int[] permutation) { Arrays.fill(translations, 0); }//from ww w .ja v a 2 s .c om }; SimpleKMeans clusterer = new SimpleKMeans(); int clustersPerSample = 2; clusterer.setNumClusters(clustersPerSample * m_contourDataGrid.numSamples()); m_classifier = new WekaContourDataClassifier(m_wekaClassifier, m_contourData, clusterer); m_classifier.buildClassifier(m_contourDataGrid, m_bgData); m_contourModels = new double[1][m_contourDataGrid.numClusters() + 1]; m_contourModels = new double[m_contourData.numSamples()][m_contourData.numClusters()]; for (int i = 0; i < m_contourData.numVectors(); i++) { if (m_contourData.weight(i) > 0) m_contourModels[m_contourData.getSampleIndex(i)][m_contourData.getClusterIdx(i)] = 1.0; } removeRedundantContourModels(); }
From source file:org.knime.knip.suise.node.boundarymodel.BoundaryModel.java
License:Open Source License
protected void iterativeInferenceApproach() throws Exception { double stdev = 200; /* Conditional random field approach */ String val; if ((val = m_parameters.get(OPTIONAL_PARAMETER_STDEV)) != null) { stdev = Double.valueOf(val); }/* ww w .jav a 2 s .c om*/ m_contourData = new ContourDataMisc(stdev); // m_contourData = new ContourDataFromCRF(); // m_contourData = new ContourDataFromCRFNaive(); SimpleKMeans clusterer = new SimpleKMeans(); double clustersPerSample = 1; clusterer.setNumClusters(Math.max(1, (int) Math.round(clustersPerSample * m_contourDataGrid.numSamples()))); m_classifier = new WekaContourDataClassifier(m_wekaClassifier, m_contourData, clusterer); m_classifier.buildClassifier(m_contourDataGrid, m_bgData); m_contourModels = new double[m_contourData.numSamples()][m_contourData.numClusters()]; for (int i = 0; i < m_contourData.numVectors(); i++) { if (m_contourData.weight(i) > 0) m_contourModels[m_contourData.getSampleIndex(i)][m_contourData.getClusterIdx(i)] = 1.0; } removeRedundantContourModels(); }
From source file:org.knime.knip.suise.node.boundarymodel.BoundaryModel.java
License:Open Source License
protected void intervalRuleInductionApproach() throws Exception { /* Interval rule induction */ // train the iri (interval rule induction) classifier double bias = 100; String val; if ((val = m_parameters.get(OPTIONAL_PARAMETER_BIAS)) != null) { bias = Double.valueOf(val); }/*from ww w. jav a 2 s . com*/ final IRI miClass = new IRI(); miClass.setBias(bias); // set the bias according to the mean sample length double meanSampleLength = 0; for (int i = 0; i < m_contourDataGrid.numSamples(); i++) { meanSampleLength += m_contourDataGrid.getSampleLength(i); } meanSampleLength /= m_contourDataGrid.numSamples(); miClass.setBias((int) Math.round(meanSampleLength / 2)); // extract the actual contour data to create the contour models m_contourData = new ContourDataFromIRI(miClass); // m_contourData.extractContourData(m_contourDataGrid); // new WekaMIContourDataClassifier(miClass).buildClassifier(m_contourDataGrid, m_bgData); /* * use the extracted contour data to feed a weka classifier */ SimpleKMeans clusterer = new SimpleKMeans(); double clustersPerSample = 1; clusterer.setNumClusters(Math.max(1, (int) Math.round(clustersPerSample * m_contourDataGrid.numSamples()))); m_classifier = new WekaContourDataClassifier(m_wekaClassifier, m_contourData, clusterer); m_classifier.buildClassifier(m_contourDataGrid, m_bgData); m_contourModels = new double[m_contourData.numSamples()][m_contourData.numClusters()]; for (int i = 0; i < m_contourData.numVectors(); i++) { if (m_contourData.weight(i) > 0) m_contourModels[m_contourData.getSampleIndex(i)][m_contourData.getClusterIdx(i)] = 1.0; } removeRedundantContourModels(); /* * use this, if the retrieved interval rules should be used directly for * classification */ // // retrieve a rule distribution for each sample, summarize // // distributions to cell models // ContourDataExtractor cd = m_contourData; // int numSamples = cd.numSamples(); // final int numClusters = cd.numClusters(); // int numVectors = cd.numVectors(); // m_contourModels = new double[numSamples][numClusters]; // for (int i = 0; i < numVectors; i++) { // m_contourModels[cd.getSampleIndex(i)][cd.getClusterIdx(i)]++; // } // // for (int i = 0; i < numSamples; i++) { // System.out.println(Arrays.toString(m_contourModels[i])); // Utils.normalize(m_contourModels[i], // m_contourModels[i][Utils.maxIndex(m_contourModels[i])]); // } // // // create a new classifier for each contour model // m_classifier = new ContourDataClassifier() { // public double contourProbability(double[] inst) throws Exception { // return 0; // } // // @Override // public void buildClassifier(ContourDataGrid cData, // VectorDataList bgData) throws Exception { // // // } // // public double[] contourProbDistribution(double[] inst) // throws Exception { // Instance i = new DenseInstance(1.0, inst); // double[] distr = new double[numClusters - 1]; // for (int j = 0; j < distr.length; j++) { // distr[j] = miClass.getRule(j).distributionForInstance(i)[1]; // } // return distr; // } // }; }
From source file:org.knime.knip.suise.node.boundarymodel.contourdata.ContourDataFromClusterSelection.java
License:Open Source License
/** * {@inheritDoc}// w w w . j a v a 2 s . com */ @Override protected void extractContourData(int[] translations, int[] permutation) { SimpleKMeans clusterer = new SimpleKMeans(); try { clusterer.setNumClusters(m_numClusters); // cluster the data ArrayList<Attribute> attInfo = new ArrayList<Attribute>(); for (int a = 0; a < contourDataGrid().numFeatures(); a++) { attInfo.add(new Attribute("att" + a)); } Instances data = new Instances("dataset", attInfo, contourDataGrid().numVectors()); for (double[] vec : contourDataGrid()) { data.add(new DenseInstance(1.0, vec)); } clusterer.buildClusterer(data); // create clustered images p(C|x) Img[] imgs = new Img[m_numClusters]; int[] dims = new int[] { contourDataGrid().width(), contourDataGrid().totalLength() }; Cursor<FloatType>[] cursors = new Cursor[m_numClusters]; for (int i = 0; i < imgs.length; i++) { imgs[i] = new ArrayImgFactory<FloatType>().create(dims, new FloatType()); cursors[i] = imgs[i].localizingCursor(); } int cluster; for (Instance instance : data) { for (int i = 0; i < cursors.length; i++) { cursors[i].fwd(); } cluster = clusterer.clusterInstance(instance); cursors[cluster].get().set(1.0f); } // greedily select the best cluster combination starting with all // clusters together and then removing the one whose removal // maximises the score of the remaining clusters Img<FloatType> res = imgs[0].factory().create(imgs[0], new FloatType()); Cursor<FloatType> resC = res.cursor(); while (resC.hasNext()) { resC.fwd(); resC.get().set(1.0f); } Img<FloatType> tmp = res.factory().create(res, new FloatType()); // TODO: normalize img // NormalizeIterableInterval<FloatType, Img<FloatType>> imgNorm = // new NormalizeIterableInterval<FloatType, Img<FloatType>>(); double score = 0; double bestScore = -Double.MAX_VALUE; double globalBestScore = -Double.MAX_VALUE; int bestCluster = 0; // ShowInSameFrame showInFrame = new ShowInSameFrame(); for (int i = 0; i < m_numClusters; i++) { for (int j = 0; j < m_numClusters; j++) { if (imgs[j] != null) { substract(res, imgs[j], tmp); score = calcScore(tmp, m_bias); if (score > bestScore) { bestScore = score; bestCluster = j; } } } substract(res, imgs[bestCluster], res); imgs[bestCluster] = null; // Pair<FloatType, FloatType> minmax = // Operations.compute(new MinMax<FloatType>(), tmp); // Operations.<FloatType, FloatType> map( // new Normalize<FloatType>(minmax.getA().getRealDouble(), // minmax.getB().getRealDouble(), // -Float.MAX_VALUE, Float.MAX_VALUE)).compute( // tmp, tmp); // showInFrame.show(tmp, 2.0); if (bestScore < globalBestScore) { break; } globalBestScore = bestScore; bestScore = -Double.MAX_VALUE; } // calculate the translations (mean positions) resC = res.localizingCursor(); double meanPos = 0; double num = 0; int index = 0; while (resC.hasNext()) { resC.fwd(); meanPos += resC.get().get() * resC.getDoublePosition(0); num += resC.get().get(); index++; if ((index % res.dimension(0)) == 0) { if (num > 0) { translations[(int) ((index - 1) / res.dimension(0))] = (int) Math.round(meanPos / num) - CENTER_COL; } else { // setWeight((int)((index - 1) / res.dimension(0)), 0); translations[(int) ((index - 1) / res.dimension(0))] = 0; } meanPos = 0; num = 0; } } } catch (Exception e) { // TODO Auto-generated catch block } }