List of usage examples for weka.clusterers XMeans clusterInstance
@Override public int clusterInstance(Instance instance) throws Exception
From source file:net.sf.markov4jmeter.behaviormodelextractor.extraction.transformation.clustering.XMeansClusteringStrategy.java
License:Apache License
/** * {@inheritDoc}//from ww w. j a va2s.c o m * * <p> * This method is specialized for <b>xmeans</b> clustering. */ @Override public BehaviorMix apply(final BehaviorModelAbsolute[] behaviorModelsAbsolute, final UseCaseRepository useCaseRepository) { final ABMToRBMTransformer abmToRbmTransformer = new ABMToRBMTransformer(); // Behavior Mix to be returned; final BehaviorMix behaviorMix = this.createBehaviorMix(); try { // Returns a valid instances set, generated based on the absolut // behavior models Instances instances = getInstances(behaviorModelsAbsolute); // XMeans --> Weka XMeans xmeans = new XMeans(); if (CommandLineArgumentsHandler.getSeedValue() != null) { xmeans.setSeed(Integer.parseInt(CommandLineArgumentsHandler.getSeedValue())); } // distance function DistanceFunction euclideanDistance = new EuclideanDistance(); // String[] options = new String[1]; // options[0] = "-D"; // euclideanDistance.setOptions(options); euclideanDistance.setInstances(instances); xmeans.setDistanceF(euclideanDistance); // DistanceFunction manhattanDistance = new ManhattanDistance(); // String[] options = new String[1]; // options[0] = "-D"; // manhattanDistance.setOptions(options); // manhattanDistance.setInstances(instances); // xmeans.setDistanceF(manhattanDistance); int[] clustersize = null; // create new assignments int[] assignments = new int[instances.numInstances()]; // get number of clusters to be generated. int numberOfClustersMin = Integer.parseInt(CommandLineArgumentsHandler.getNumberOfClustersMin()); int numberOfClustersMax = 0; if (CommandLineArgumentsHandler.getNumberOfClustersMax() != "") { numberOfClustersMax = Integer.parseInt(CommandLineArgumentsHandler.getNumberOfClustersMax()); } else { numberOfClustersMax = numberOfClustersMin; } // clustering xmeans.setMinNumClusters(numberOfClustersMin); xmeans.setMaxNumClusters(numberOfClustersMax); // build cluster xmeans.buildClusterer(instances); ClusterEvaluation clusterEvaluation = new ClusterEvaluation(); clusterEvaluation.setClusterer(xmeans); clusterEvaluation.evaluateClusterer(instances); // clusterSize clustersize = new int[xmeans.getClusterCenters().numInstances()]; // set assignments and clustersize for (int s = 0; s < instances.numInstances(); s++) { assignments[s] = xmeans.clusterInstance(instances.instance(s)); clustersize[xmeans.clusterInstance(instances.instance(s))]++; } ClusteringMetrics clusteringMetrics = new ClusteringMetrics(); clusteringMetrics.calculateInterClusteringSimilarity(xmeans.getClusterCenters()); clusteringMetrics.calculateIntraClusteringSimilarity(xmeans.getClusterCenters(), instances, assignments); clusteringMetrics.calculateBetas(); clusteringMetrics.printErrorMetricsHeader(); clusteringMetrics.printErrorMetrics(xmeans.getClusterCenters().numInstances()); clusteringMetrics.printClusteringMetrics(clustersize, assignments, instances); // clusteringMetrics.printClusterAssignmentsToSession(assignments, // xmeans.getClusterCenters().numInstances()); Instances resultingCentroids = xmeans.getClusterCenters(); // for each centroid instance, create new behaviorModelRelative for (int i = 0; i < resultingCentroids.numInstances(); i++) { Instance centroid = resultingCentroids.instance(i); // create a Behavior Model, which includes all vertices only; // the vertices are associated with the use cases, and a // dedicated // vertex that represents the final state will be added; final BehaviorModelAbsolute behaviorModelAbsoluteCentroid = this .createBehaviorModelAbsoluteWithoutTransitions(useCaseRepository.getUseCases()); // install the transitions in between vertices; this.installTransitions(behaviorModelsAbsolute, behaviorModelAbsoluteCentroid, centroid, assignments, i); // convert absolute to relative behaviorModel final BehaviorModelRelative behaviorModelRelative = abmToRbmTransformer .transform(behaviorModelAbsoluteCentroid); // relative Frequency of cluster i double relativeFrequency = (double) clustersize[i] / (double) instances.numInstances(); // create the (unique) Behavior Mix entry to be returned; final BehaviorMixEntry behaviorMixEntry = this.createBehaviorMixEntry( AbstractClusteringStrategy.GENERIC_BEHAVIOR_MODEL_NAME, relativeFrequency, // relative frequency; behaviorModelRelative); // add to resulting behaviorMix behaviorMix.getEntries().add(behaviorMixEntry); } return behaviorMix; } catch (ExtractionException e) { e.printStackTrace(); } catch (Exception e) { e.printStackTrace(); } // if any error occurs, an ExtractionExeption should be thrown, // indicating the error that occurred; // the classes "NoClusteringStrategy" and "SimpleClusteringStrategy" // should give an idea for handling the Behavior Models and how to // use the helping methods of the (abstract) parent class. return behaviorMix; }
From source file:org.iobserve.analysis.behavior.clustering.xmeans.XMeansClustering.java
License:Apache License
private Optional<ClusteringResults> getClusteringResults(final Instances instances) { final XMeans xMeansClusterer = new XMeans(); xMeansClusterer.setSeed(new Random().nextInt(Integer.MAX_VALUE)); xMeansClusterer.setDistanceF(this.distanceMetric); xMeansClusterer.setMinNumClusters(this.minClusters); xMeansClusterer.setMaxNumClusters(this.maxClusters); try {//from w ww . ja v a 2 s .co m xMeansClusterer.buildClusterer(instances); /** * Code used from org.iobserve.analysis.userbehavior.XMeansClustering to use * org.iobserve.analysis.userbehavior.ClusteringResults */ int[] clustersize = null; final int[] assignments = new int[instances.numInstances()]; clustersize = new int[xMeansClusterer.getClusterCenters().numInstances()]; for (int s = 0; s < instances.numInstances(); s++) { assignments[s] = xMeansClusterer.clusterInstance(instances.instance(s)); clustersize[xMeansClusterer.clusterInstance(instances.instance(s))]++; } final ClusteringMetrics clusteringMetrics = new ClusteringMetrics(xMeansClusterer.getClusterCenters(), instances, assignments); clusteringMetrics.calculateSimilarityMetrics(); final ClusteringResults xMeansClusteringResults = new ClusteringResults("X-Means", xMeansClusterer.getClusterCenters().numInstances(), assignments, clusteringMetrics); return Optional.of(xMeansClusteringResults); } catch (final Exception e) { // NOPMD NOCS api dependency XMeansClustering.LOGGER.error("Clustering failed.", e); } return Optional.empty(); }
From source file:org.iobserve.analysis.behavior.karlsruhe.XMeansClustering.java
License:Apache License
/** * * @param instances//from w ww.j a v a 2 s . co m * data to cluster in Weka format * @param numberOfUserGroupsFromInputUsageModel * is the input number of clusters * @param varianceOfUserGroups * enables the creation of a minimum and maximum number of clusters * @param seed * states a random determination of the initial centroids * @return the clustering results that contain the number of cluster and the assignments */ public ClusteringResults clusterSessionsWithXMeans(final Instances instances, final int numberOfUserGroupsFromInputUsageModel, final int varianceOfUserGroups, final int seed) { ClusteringResults xMeansClusteringResults = null; try { final XMeans xmeans = new XMeans(); xmeans.setSeed(seed); final NormalizableDistance manhattenDistance = new ManhattanDistance(); manhattenDistance.setDontNormalize(false); manhattenDistance.setInstances(instances); xmeans.setDistanceF(manhattenDistance); int[] clustersize = null; final int[] assignments = new int[instances.numInstances()]; // Determines the range of clusters // The X-Means clustering algorithm determines the best fitting number of clusters // within this range by itself int numberOfClustersMin = numberOfUserGroupsFromInputUsageModel - varianceOfUserGroups; int numberOfClustersMax = numberOfUserGroupsFromInputUsageModel + varianceOfUserGroups; if (numberOfClustersMax < 2) { numberOfClustersMax = 1; numberOfClustersMin = 1; } else { if (numberOfClustersMin < 2) { numberOfClustersMin = 2; } } xmeans.setMinNumClusters(numberOfClustersMin); xmeans.setMaxNumClusters(numberOfClustersMax); xmeans.buildClusterer(instances); clustersize = new int[xmeans.getClusterCenters().numInstances()]; for (int s = 0; s < instances.numInstances(); s++) { assignments[s] = xmeans.clusterInstance(instances.instance(s)); clustersize[xmeans.clusterInstance(instances.instance(s))]++; } final ClusteringMetrics clusteringMetrics = new ClusteringMetrics(xmeans.getClusterCenters(), instances, assignments); clusteringMetrics.calculateSimilarityMetrics(); xMeansClusteringResults = new ClusteringResults("X-Means", xmeans.getClusterCenters().numInstances(), assignments, clusteringMetrics); } catch (final Exception e) { // NOPMD NOCS due to broken xmeans implementation triggering // Exception e.printStackTrace(); } return xMeansClusteringResults; }
From source file:org.iobserve.analysis.userbehavior.XMeansClustering.java
License:Apache License
/** * * @param instances//from w w w . j a va2 s.c o m * data to cluster in Weka format * @param numberOfUserGroupsFromInputUsageModel * is the input number of clusters * @param varianceOfUserGroups * enables the creation of a minimum and maximum number of clusters * @param seed * states a random determination of the initial centroids * @return the clustering results that contain the number of cluster and the assignments */ public ClusteringResults clusterSessionsWithXMeans(final Instances instances, final int numberOfUserGroupsFromInputUsageModel, final int varianceOfUserGroups, final int seed) { ClusteringResults xMeansClusteringResults = null; try { final XMeans xmeans = new XMeans(); xmeans.setSeed(seed); final NormalizableDistance manhattenDistance = new ManhattanDistance(); manhattenDistance.setDontNormalize(false); manhattenDistance.setInstances(instances); xmeans.setDistanceF(manhattenDistance); int[] clustersize = null; final int[] assignments = new int[instances.numInstances()]; // Determines the range of clusters // The X-Means clustering algorithm determines the best fitting number of clusters // within this range by itself int numberOfClustersMin = numberOfUserGroupsFromInputUsageModel - varianceOfUserGroups; int numberOfClustersMax = numberOfUserGroupsFromInputUsageModel + varianceOfUserGroups; if (numberOfClustersMax < 2) { numberOfClustersMax = 1; numberOfClustersMin = 1; } else { if (numberOfClustersMin < 2) { numberOfClustersMin = 2; } } xmeans.setMinNumClusters(numberOfClustersMin); xmeans.setMaxNumClusters(numberOfClustersMax); xmeans.buildClusterer(instances); clustersize = new int[xmeans.getClusterCenters().numInstances()]; for (int s = 0; s < instances.numInstances(); s++) { assignments[s] = xmeans.clusterInstance(instances.instance(s)); clustersize[xmeans.clusterInstance(instances.instance(s))]++; } final ClusteringMetrics clusteringMetrics = new ClusteringMetrics(xmeans.getClusterCenters(), instances, assignments); clusteringMetrics.calculateSimilarityMetrics(); xMeansClusteringResults = new ClusteringResults("X-Means", xmeans.getClusterCenters().numInstances(), assignments, clusteringMetrics); } catch (final Exception e) { // NOCS due to broken xmeans implementation triggering // Exception e.printStackTrace(); } return xMeansClusteringResults; }
From source file:org.montp2.m1decol.ter.clustering.XMeansClustering.java
License:Open Source License
public Clusterer computeClustering(String inPath, String outPath, Properties propertiesCluster) throws Exception { Instances inputInstances = WekaUtils.loadARFF(inPath); EuclideanDistance euclideanDistance = new EuclideanDistance(); euclideanDistance.setAttributeIndices("first-last"); euclideanDistance.setDontNormalize(false); euclideanDistance.setInvertSelection(false); XMeans xmeans = new XMeans(); xmeans.setMaxIterations(500);//from w w w .ja va2s . c o m xmeans.setSeed(10); xmeans.setMinNumClusters(5); xmeans.setMaxNumClusters(12); xmeans.setMaxKMeans(1000); xmeans.setMaxKMeansForChildren(1000); xmeans.setBinValue(1.0); xmeans.setCutOffFactor(0.5); xmeans.setDebugLevel(0); xmeans.setMaxIterations(1); xmeans.buildClusterer(inputInstances); Enumeration<Instance> e = inputInstances.enumerateInstances(); while (e.hasMoreElements()) { Instance ins = e.nextElement(); int cluster_num = xmeans.clusterInstance(ins); System.out.println(ins.toString()); System.out.println(cluster_num); } WekaUtils.saveModel(xmeans, outPath); return xmeans; }
From source file:qoala.arff.java
public void XMenas() throws Exception { Instances train = new Instances(dataSet); XMeans xm = new XMeans(); xm.setMaxNumClusters(100);//from www . j a va2 s .c om xm.setMinNumClusters(2); xm.buildClusterer(train); ClusterEvaluation eval = new ClusterEvaluation(); eval.setClusterer(xm); eval.evaluateClusterer(train); eval.getNumClusters(); System.out.println("Cluster Evaluation:" + eval.clusterResultsToString()); System.out.println("# - cluster - distribution"); for (int j = 0; j < eval.getNumClusters(); j++) { for (int i = 0; i < train.numInstances(); i++) { int cluster = xm.clusterInstance(train.instance(i)); if (cluster == j) System.out.println("Instance " + i + " -> Cluster number: " + cluster); } } }
From source file:tr.gov.ulakbim.jDenetX.experiments.wrappers.EvalActiveBoostingID.java
License:Open Source License
public static Instances clusterInstances(Instances data) { XMeans xmeans = new XMeans(); Remove filter = new Remove(); Instances dataClusterer = null;//w w w. j a va2 s.c o m if (data == null) { throw new NullPointerException("Data is null at clusteredInstances method"); } //Get the attributes from the data for creating the sampled_data object ArrayList<Attribute> attrList = new ArrayList<Attribute>(); Enumeration attributes = data.enumerateAttributes(); while (attributes.hasMoreElements()) { attrList.add((Attribute) attributes.nextElement()); } Instances sampled_data = new Instances(data.relationName(), attrList, 0); data.setClassIndex(data.numAttributes() - 1); sampled_data.setClassIndex(data.numAttributes() - 1); filter.setAttributeIndices("" + (data.classIndex() + 1)); data.remove(0);//In Wavelet Stream of MOA always the first element comes without class try { filter.setInputFormat(data); dataClusterer = Filter.useFilter(data, filter); String[] options = new String[4]; options[0] = "-L"; // max. iterations options[1] = Integer.toString(noOfClassesInPool - 1); if (noOfClassesInPool > 2) { options[1] = Integer.toString(noOfClassesInPool - 1); xmeans.setMinNumClusters(noOfClassesInPool - 1); } else { options[1] = Integer.toString(noOfClassesInPool); xmeans.setMinNumClusters(noOfClassesInPool); } xmeans.setMaxNumClusters(data.numClasses() + 1); System.out.println("No of classes in the pool: " + noOfClassesInPool); xmeans.setUseKDTree(true); //xmeans.setOptions(options); xmeans.buildClusterer(dataClusterer); System.out.println("Xmeans\n:" + xmeans); } catch (Exception e) { e.printStackTrace(); } //System.out.println("Assignments\n: " + assignments); ClusterEvaluation eval = new ClusterEvaluation(); eval.setClusterer(xmeans); try { eval.evaluateClusterer(data); int classesToClustersMap[] = eval.getClassesToClusters(); //check the classes to cluster map int clusterNo = 0; for (int i = 0; i < data.size(); i++) { clusterNo = xmeans.clusterInstance(dataClusterer.get(i)); //Check if the class value of instance and class value of cluster matches if ((int) data.get(i).classValue() == classesToClustersMap[clusterNo]) { sampled_data.add(data.get(i)); } } } catch (Exception e) { e.printStackTrace(); } return ((Instances) sampled_data); }