List of usage examples for weka.clusterers SimpleKMeans SimpleKMeans
public SimpleKMeans()
From source file:org.knime.knip.suise.node.boundarymodel.BoundaryModel.java
License:Open Source License
protected void iterativeInferenceApproach() throws Exception { double stdev = 200; /* Conditional random field approach */ String val; if ((val = m_parameters.get(OPTIONAL_PARAMETER_STDEV)) != null) { stdev = Double.valueOf(val); }/* w w w . java 2s . c o m*/ m_contourData = new ContourDataMisc(stdev); // m_contourData = new ContourDataFromCRF(); // m_contourData = new ContourDataFromCRFNaive(); SimpleKMeans clusterer = new SimpleKMeans(); double clustersPerSample = 1; clusterer.setNumClusters(Math.max(1, (int) Math.round(clustersPerSample * m_contourDataGrid.numSamples()))); m_classifier = new WekaContourDataClassifier(m_wekaClassifier, m_contourData, clusterer); m_classifier.buildClassifier(m_contourDataGrid, m_bgData); m_contourModels = new double[m_contourData.numSamples()][m_contourData.numClusters()]; for (int i = 0; i < m_contourData.numVectors(); i++) { if (m_contourData.weight(i) > 0) m_contourModels[m_contourData.getSampleIndex(i)][m_contourData.getClusterIdx(i)] = 1.0; } removeRedundantContourModels(); }
From source file:org.knime.knip.suise.node.boundarymodel.BoundaryModel.java
License:Open Source License
protected void intervalRuleInductionApproach() throws Exception { /* Interval rule induction */ // train the iri (interval rule induction) classifier double bias = 100; String val; if ((val = m_parameters.get(OPTIONAL_PARAMETER_BIAS)) != null) { bias = Double.valueOf(val); }//w w w . j ava 2 s. co m final IRI miClass = new IRI(); miClass.setBias(bias); // set the bias according to the mean sample length double meanSampleLength = 0; for (int i = 0; i < m_contourDataGrid.numSamples(); i++) { meanSampleLength += m_contourDataGrid.getSampleLength(i); } meanSampleLength /= m_contourDataGrid.numSamples(); miClass.setBias((int) Math.round(meanSampleLength / 2)); // extract the actual contour data to create the contour models m_contourData = new ContourDataFromIRI(miClass); // m_contourData.extractContourData(m_contourDataGrid); // new WekaMIContourDataClassifier(miClass).buildClassifier(m_contourDataGrid, m_bgData); /* * use the extracted contour data to feed a weka classifier */ SimpleKMeans clusterer = new SimpleKMeans(); double clustersPerSample = 1; clusterer.setNumClusters(Math.max(1, (int) Math.round(clustersPerSample * m_contourDataGrid.numSamples()))); m_classifier = new WekaContourDataClassifier(m_wekaClassifier, m_contourData, clusterer); m_classifier.buildClassifier(m_contourDataGrid, m_bgData); m_contourModels = new double[m_contourData.numSamples()][m_contourData.numClusters()]; for (int i = 0; i < m_contourData.numVectors(); i++) { if (m_contourData.weight(i) > 0) m_contourModels[m_contourData.getSampleIndex(i)][m_contourData.getClusterIdx(i)] = 1.0; } removeRedundantContourModels(); /* * use this, if the retrieved interval rules should be used directly for * classification */ // // retrieve a rule distribution for each sample, summarize // // distributions to cell models // ContourDataExtractor cd = m_contourData; // int numSamples = cd.numSamples(); // final int numClusters = cd.numClusters(); // int numVectors = cd.numVectors(); // m_contourModels = new double[numSamples][numClusters]; // for (int i = 0; i < numVectors; i++) { // m_contourModels[cd.getSampleIndex(i)][cd.getClusterIdx(i)]++; // } // // for (int i = 0; i < numSamples; i++) { // System.out.println(Arrays.toString(m_contourModels[i])); // Utils.normalize(m_contourModels[i], // m_contourModels[i][Utils.maxIndex(m_contourModels[i])]); // } // // // create a new classifier for each contour model // m_classifier = new ContourDataClassifier() { // public double contourProbability(double[] inst) throws Exception { // return 0; // } // // @Override // public void buildClassifier(ContourDataGrid cData, // VectorDataList bgData) throws Exception { // // // } // // public double[] contourProbDistribution(double[] inst) // throws Exception { // Instance i = new DenseInstance(1.0, inst); // double[] distr = new double[numClusters - 1]; // for (int j = 0; j < distr.length; j++) { // distr[j] = miClass.getRule(j).distributionForInstance(i)[1]; // } // return distr; // } // }; }
From source file:org.knime.knip.suise.node.boundarymodel.contourdata.ContourDataFromClusterSelection.java
License:Open Source License
/** * {@inheritDoc}/*from w w w. j av a 2 s. c o m*/ */ @Override protected void extractContourData(int[] translations, int[] permutation) { SimpleKMeans clusterer = new SimpleKMeans(); try { clusterer.setNumClusters(m_numClusters); // cluster the data ArrayList<Attribute> attInfo = new ArrayList<Attribute>(); for (int a = 0; a < contourDataGrid().numFeatures(); a++) { attInfo.add(new Attribute("att" + a)); } Instances data = new Instances("dataset", attInfo, contourDataGrid().numVectors()); for (double[] vec : contourDataGrid()) { data.add(new DenseInstance(1.0, vec)); } clusterer.buildClusterer(data); // create clustered images p(C|x) Img[] imgs = new Img[m_numClusters]; int[] dims = new int[] { contourDataGrid().width(), contourDataGrid().totalLength() }; Cursor<FloatType>[] cursors = new Cursor[m_numClusters]; for (int i = 0; i < imgs.length; i++) { imgs[i] = new ArrayImgFactory<FloatType>().create(dims, new FloatType()); cursors[i] = imgs[i].localizingCursor(); } int cluster; for (Instance instance : data) { for (int i = 0; i < cursors.length; i++) { cursors[i].fwd(); } cluster = clusterer.clusterInstance(instance); cursors[cluster].get().set(1.0f); } // greedily select the best cluster combination starting with all // clusters together and then removing the one whose removal // maximises the score of the remaining clusters Img<FloatType> res = imgs[0].factory().create(imgs[0], new FloatType()); Cursor<FloatType> resC = res.cursor(); while (resC.hasNext()) { resC.fwd(); resC.get().set(1.0f); } Img<FloatType> tmp = res.factory().create(res, new FloatType()); // TODO: normalize img // NormalizeIterableInterval<FloatType, Img<FloatType>> imgNorm = // new NormalizeIterableInterval<FloatType, Img<FloatType>>(); double score = 0; double bestScore = -Double.MAX_VALUE; double globalBestScore = -Double.MAX_VALUE; int bestCluster = 0; // ShowInSameFrame showInFrame = new ShowInSameFrame(); for (int i = 0; i < m_numClusters; i++) { for (int j = 0; j < m_numClusters; j++) { if (imgs[j] != null) { substract(res, imgs[j], tmp); score = calcScore(tmp, m_bias); if (score > bestScore) { bestScore = score; bestCluster = j; } } } substract(res, imgs[bestCluster], res); imgs[bestCluster] = null; // Pair<FloatType, FloatType> minmax = // Operations.compute(new MinMax<FloatType>(), tmp); // Operations.<FloatType, FloatType> map( // new Normalize<FloatType>(minmax.getA().getRealDouble(), // minmax.getB().getRealDouble(), // -Float.MAX_VALUE, Float.MAX_VALUE)).compute( // tmp, tmp); // showInFrame.show(tmp, 2.0); if (bestScore < globalBestScore) { break; } globalBestScore = bestScore; bestScore = -Double.MAX_VALUE; } // calculate the translations (mean positions) resC = res.localizingCursor(); double meanPos = 0; double num = 0; int index = 0; while (resC.hasNext()) { resC.fwd(); meanPos += resC.get().get() * resC.getDoublePosition(0); num += resC.get().get(); index++; if ((index % res.dimension(0)) == 0) { if (num > 0) { translations[(int) ((index - 1) / res.dimension(0))] = (int) Math.round(meanPos / num) - CENTER_COL; } else { // setWeight((int)((index - 1) / res.dimension(0)), 0); translations[(int) ((index - 1) / res.dimension(0))] = 0; } meanPos = 0; num = 0; } } } catch (Exception e) { // TODO Auto-generated catch block } }
From source file:org.montp2.m1decol.ter.clustering.KMeansClustering.java
License:Open Source License
public Clusterer computeClustering(String inPath, String outPath, Properties propertiesCluster) throws Exception { Instances inputInstances = WekaUtils.loadARFF(inPath); EuclideanDistance euclideanDistance = new EuclideanDistance(); euclideanDistance.setAttributeIndices("first-last"); euclideanDistance.setDontNormalize(false); euclideanDistance.setInvertSelection(false); SimpleKMeans kmeans = new SimpleKMeans(); kmeans.setPreserveInstancesOrder(//from www.j a v a 2s . c o m Boolean.valueOf(propertiesCluster.getProperty(ClusterProperties.Kmeans.PERSERVE_INSTANCE))); kmeans.setDontReplaceMissingValues(Boolean .valueOf(propertiesCluster.getProperty(ClusterProperties.Kmeans.DONT_REPLACE_MISSING_VALUES))); kmeans.setDisplayStdDevs( Boolean.valueOf(propertiesCluster.getProperty(ClusterProperties.Kmeans.DISPLAY_STD_DEVS))); kmeans.setMaxIterations( Integer.valueOf(propertiesCluster.getProperty(ClusterProperties.Kmeans.MAX_ITERATIONS))); kmeans.setNumClusters( Integer.valueOf(propertiesCluster.getProperty(ClusterProperties.Kmeans.NUM_CLUSTERS))); kmeans.setSeed(10); //kmeans.setSeed( // Integer.valueOf(propertiesCluster.getProperty(ClusterProperties.Kmeans.SEED))); kmeans.setDistanceFunction(euclideanDistance); kmeans.buildClusterer(inputInstances); WekaUtils.saveModel(kmeans, outPath); /* * * Pour obtenir les pourcentages de les clusters * ClusterEvaluation eval = new ClusterEvaluation(); * eval.setClusterer(kmeans); * eval.evaluateClusterer(inputInstances); * System.out.println(eval.clusterResultsToString()); * * */ return kmeans; }
From source file:probcog.bayesnets.learning.DomainLearner.java
License:Open Source License
/** * performs the clustering (if some domains are to be learnt by clustering) * and applies all the new domains. (This method is called by finish(), * which should be called when all the examples have been passed.) *///from w w w. j av a 2 s . c om protected void end_learning() throws Exception { if (directDomains != null) for (int i = 0; i < directDomains.length; i++) { if (verbose) System.out.println(directDomains[i]); HashSet<String> hs = directDomainData.get(i); Discrete domain = new Discrete(); for (Iterator<String> iter = hs.iterator(); iter.hasNext();) domain.addName(iter.next()); BeliefNode node = directDomains[i]; if (node == null) { System.out.println( "No node with name '" + directDomains[i] + "' found to learn direct domain for."); } //System.out.println("DomainLearner: applying domain " + hs + " to " + node.getName()); bn.bn.changeBeliefNodeDomain(node, domain); } if (clusteredDomains != null) for (int i = 0; i < clusteredDomains.length; i++) { if (verbose) System.out.println(clusteredDomains[i].nodeName); try { // perform clustering clusterers[i] = new SimpleKMeans(); if (clusteredDomains[i].numClusters != 0) clusterers[i].setNumClusters(clusteredDomains[i].numClusters); clusterers[i].buildClusterer(clusterData[i]); // update domain bn.bn.changeBeliefNodeDomain(bn.getNode(clusteredDomains[i].nodeName), new Discretized(clusterers[i], clusterNamer)); } catch (Exception e) { e.printStackTrace(); } } if (duplicateDomains != null) { for (int i = 0; i < duplicateDomains.length; i++) { Domain srcDomain = bn.getDomain(duplicateDomains[i][0]); for (int j = 1; j < duplicateDomains[i].length; j++) { if (verbose) System.out.println(duplicateDomains[i][j]); bn.bn.changeBeliefNodeDomain(bn.getNode(duplicateDomains[i][j]), srcDomain); } } } }
From source file:probcog.clustering.SimpleClusterer.java
License:Open Source License
public SimpleClusterer() { this(new SimpleKMeans()); }
From source file:probcog.hmm.latent.SubHMM.java
License:Open Source License
public static SegmentSequence<? extends ObservationVector> learnViaClustering( IDwellTimeHMM<ObservationVector> hmm, Iterable<? extends Segment<? extends ObservationVector>> s, boolean usePseudoCounts) throws Exception { final int dim = s.iterator().next().firstElement().dimension(); Integer numStates = hmm.getNumStates(); // clustering MultiDimClusterer<?> clusterer; if (numStates != null) clusterer = new KMeansClusterer(new SimpleKMeans(), dim, numStates); else/*from w w w . j av a 2 s . c o m*/ clusterer = new EMClusterer(new EM(), dim); for (Segment<? extends ObservationVector> seg : s) for (ObservationVector p : seg) //clusterer.addInstance(p.getArray()); clusterer.addInstance(p.values()); // TODO slow, performs clone clusterer.buildClusterer(); if (numStates == null) { numStates = clusterer.getWekaClusterer().numberOfClusters(); hmm.setNumStates(numStates); } // count transitions and partition // partition observations according to clustering TransitionLearner tl = new TransitionLearner(numStates, usePseudoCounts); DistributionLearner dl = new DistributionLearner(numStates, usePseudoCounts); SegmentSequence<ObservationVector> segseq = new SegmentSequence<ObservationVector>("foo"); for (Segment<? extends ObservationVector> seg : s) { int prev = -1; for (ObservationVector p : seg) { //int c = clusterer.classify(p.getArray()); int c = clusterer.classify(p.values()); // TODO inefficient, clones values segseq.build(c, p); if (prev == -1) dl.learn(c); else tl.learn(prev, c); prev = c; } segseq.buildEndSegment(); } hmm.setA(tl.finish()); hmm.setPi(dl.finish()); // learn observation models for (int i = 0; i < numStates; i++) { hmm.learnObservationModel(i, segseq.getSegments(i)); //System.out.printf(" sub-hmm %d: %d data points\n", i, partitions.get(i).size()); } return segseq; }
From source file:processes.ClusterProcess.java
private void initiateClusters(int randomSeedMax, int clusterCount) { if (!clusterInitiated) { dataGraph = new SimpleKMeans(); dataGraph.setPreserveInstancesOrder(true); dataGraph.setSeed(randomSeedMax); try {//from w w w . j av a 2 s. c o m dataGraph.setNumClusters(clusterCount); } catch (Exception e) { LOGGER.log(Level.SEVERE, "Error@ClusterProcess_initiateCluster", e); clusterCount = ((-1) * clusterCount) + 1; initiateClusters(randomSeedMax, clusterCount); } clusterInitiated = true; } }
From source file:qoala.arff.java
public void SimpleKmeans(int numberOfCLuster) throws Exception { Instances train = new Instances(dataSet); SimpleKMeans skm = new SimpleKMeans(); skm.setPreserveInstancesOrder(true); skm.setNumClusters(numberOfCLuster); skm.buildClusterer(train);//from ww w . j a v a 2 s. c o m skm.setSeed(10); int[] ClusterSize = skm.getClusterSizes(); ClusterEvaluation eval = new ClusterEvaluation(); eval.setClusterer(skm); eval.evaluateClusterer(train); System.out.println("Cluster Evaluation:" + eval.clusterResultsToString()); int[] assignments = skm.getAssignments(); System.out.println("# - cluster - distribution"); for (int j = 0; j < skm.getNumClusters(); j++) { int i = 0; for (int clusterNum : assignments) { if (clusterNum == j) System.out.println("Instance " + i + " -> Cluster number: " + clusterNum); i++; } } }
From source file:rdfsystem.data.DataMining.java
public static String cluster(RdfManager manager) throws Exception { Instances ins = transformData(manager, false); SimpleKMeans cls = new SimpleKMeans(); String[] options = "-N 5".split(" "); cls.setOptions(options);/*from w ww.j av a2s . c om*/ cls.buildClusterer(ins); ClusterEvaluation eval = new ClusterEvaluation(); eval.setClusterer(cls); eval.evaluateClusterer(ins); return eval.clusterResultsToString(); }