List of usage examples for weka.clusterers SimpleKMeans setOptions
@Override public void setOptions(String[] options) throws Exception
From source file:Clustering.WekaKMeansClustererWrapper.java
public ArrayList<String>[] classify(HashMap<String, List> data, boolean clearData) { ArrayList<String>[] clusterResult; try {//from www. ja va2 s . c o m File arff = m_ArffExporter.getArff(data); int nSize = data.size(); if (arff == null) return null; if (clearData) data.clear(); FileInputStream is = new FileInputStream(arff.getAbsolutePath()); Instances instances = ConverterUtils.DataSource.read(is); is.close(); String[] keys = new String[instances.numInstances()]; for (int i = 0; i < instances.numInstances(); ++i) { Instance instance = instances.instance(i); keys[i] = instance.stringValue(0); // assume that the 0th attribute is the key string } instances.deleteStringAttributes(); SimpleKMeans cl = new SimpleKMeans(); int numClusters = m_NumberOfClusters < nSize ? m_NumberOfClusters : nSize; String[] options = new String[5]; options[0] = "-O"; options[1] = "-N"; options[2] = Integer.toString(numClusters); options[3] = "-A"; options[4] = m_DistanceFunction; cl.setOptions(options); //System.out.println( "Clustering" ); cl.buildClusterer(instances); //System.out.println( "Create ArrayList" ); clusterResult = new ArrayList[m_NumberOfClusters]; for (int i = 0; i < m_NumberOfClusters; ++i) { clusterResult[i] = new ArrayList<>(); } //System.out.println( "Assigning" ); int[] assignment = cl.getAssignments(); for (int i = 0; i < assignment.length; ++i) { clusterResult[assignment[i]].add(keys[i]); } //System.out.println( "Done" ); if (!arff.delete()) arff.deleteOnExit(); } catch (Exception ex) { //System.out.println( "[EXCEPTION] " + ex.getMessage() ); m_LastErrorMessage = ex.getMessage(); return null; } return clusterResult; }
From source file:graph.clustering.NodeClusterer.java
License:Apache License
private int[] performClustering(Instances clusterTrainingSet, int numOfClusters) { String[] options = new String[7]; options[0] = "-N"; // num of clusters options[1] = String.valueOf(numOfClusters); options[2] = "-I"; // max num of iterations options[3] = "500"; options[4] = "-S"; // the random seed number options[5] = "10"; options[6] = "-O"; // preserve instance order String[] distanceOptions = new String[2]; distanceOptions[0] = "-R"; // attribute indices distanceOptions[1] = "first-last"; EuclideanDistance distanceFunc = new EuclideanDistance(); SimpleKMeans clusterer = new SimpleKMeans(); int[] assignments = null; try {//ww w. j av a 2s .c om distanceFunc.setOptions(distanceOptions); clusterer.setOptions(options); clusterer.setDistanceFunction(distanceFunc); clusterer.buildClusterer(clusterTrainingSet); assignments = clusterer.getAssignments(); } catch (Exception e1) { System.out.println("Error in clustering:"); e1.printStackTrace(); } return assignments; }
From source file:lu.lippmann.cdb.lab.beta.util.WekaUtil2.java
License:Open Source License
/** * //from w w w . jav a 2s . c o m * @param newInstances * @param K * @return * @throws Exception */ public static double[] doKMeans(final Instances newInstances, final int K) throws Exception { final SimpleKMeans clusterer = new SimpleKMeans(); clusterer.setOptions( Utils.splitOptions("-N " + K + " -R first-last -I 500 -S 10 -A weka.core.EuclideanDistance")); clusterer.buildClusterer(newInstances); final ClusterEvaluation eval = new ClusterEvaluation(); eval.setClusterer(clusterer); eval.evaluateClusterer(newInstances); double[] ass = eval.getClusterAssignments(); return ass; }
From source file:net.sf.mzmine.modules.peaklistmethods.dataanalysis.clustering.simplekmeans.SimpleKMeansClusterer.java
License:Open Source License
@Override public ClusteringResult performClustering(Instances dataset, ParameterSet parameters) { List<Integer> clusters = new ArrayList<Integer>(); String[] options = new String[2]; SimpleKMeans clusterer = new SimpleKMeans(); int numberOfGroups = parameters.getParameter(SimpleKMeansClustererParameters.numberOfGroups).getValue(); options[0] = "-N"; options[1] = String.valueOf(numberOfGroups); try {/* w w w . j a v a 2s . c om*/ clusterer.setOptions(options); clusterer.buildClusterer(dataset); Enumeration<?> e = dataset.enumerateInstances(); while (e.hasMoreElements()) { clusters.add(clusterer.clusterInstance((Instance) e.nextElement())); } ClusteringResult result = new ClusteringResult(clusters, null, clusterer.numberOfClusters(), parameters.getParameter(EMClustererParameters.visualization).getValue()); return result; } catch (Exception ex) { logger.log(Level.SEVERE, null, ex); return null; } }
From source file:rdfsystem.data.DataMining.java
public static String cluster(RdfManager manager) throws Exception { Instances ins = transformData(manager, false); SimpleKMeans cls = new SimpleKMeans(); String[] options = "-N 5".split(" "); cls.setOptions(options); cls.buildClusterer(ins);// www .j a va 2 s .c o m ClusterEvaluation eval = new ClusterEvaluation(); eval.setClusterer(cls); eval.evaluateClusterer(ins); return eval.clusterResultsToString(); }
From source file:soccer.core.classifiers.BookKeeperConsistencyClassifier.java
public static void main(String[] args) throws Exception { BookKeeperConsistency bkc = new BookKeeperConsistency(); Instances data = bkc.getInstances(); RemoveWithValues rwv = new RemoveWithValues(); rwv.setOptions(new String[] { "-C", "4", "-S", "6", "-V" }); rwv.setInputFormat(data);// ww w . j a v a 2 s. c om data = Filter.useFilter(data, rwv); RemoveWithValues rwv1 = new RemoveWithValues(); rwv1.setOptions(new String[] { "-C", "6", "-S", "6", "-V" }); rwv1.setInputFormat(data); data = Filter.useFilter(data, rwv1); // Normalize nm = new Normalize(); // nm.setOptions(new String[]{ // "-S", "100" // }); // nm.setInputFormat(data); // data = Filter.useFilter(data, nm); Remove rm = new Remove(); rm.setOptions(new String[] { "-R", "2-last" }); rm.setInputFormat(data); Instances newData = Filter.useFilter(data, rm); SimpleKMeans cluster = new SimpleKMeans(); cluster.setOptions(new String[] { "-N", "2", "-A", "weka.core.ManhattanDistance" }); cluster.buildClusterer(newData); ClusterEvaluation eval = new ClusterEvaluation(); eval.setClusterer(cluster); eval.evaluateClusterer(newData); System.out.println(eval.clusterResultsToString()); // for (int i = 0; i < newData.size(); i++) { // Instance instance = newData.get(i); // if (cluster.clusterInstance(instance) == 0) { // System.out.println(data.get(i).toString()); // } // } }
From source file:tr.gov.ulakbim.jDenetX.experiments.wrappers.EvalActiveBoostingID.java
License:Open Source License
public Instances clusteredInstances(Instances data) { if (data == null) { throw new NullPointerException("Data is null at clusteredInstances method"); }/*from w w w . ja v a2s. com*/ Instances sampled_data = data; for (int i = 0; i < sampled_data.numInstances(); i++) { sampled_data.remove(i); } SimpleKMeans sKmeans = new SimpleKMeans(); data.setClassIndex(data.numAttributes() - 1); Remove filter = new Remove(); filter.setAttributeIndices("" + (data.classIndex() + 1)); List assignments = new ArrayList(); try { filter.setInputFormat(data); Instances dataClusterer = Filter.useFilter(data, filter); String[] options = new String[3]; options[0] = "-I"; // max. iterations options[1] = "500"; options[2] = "-O"; sKmeans.setNumClusters(data.numClasses()); sKmeans.setOptions(options); sKmeans.buildClusterer(dataClusterer); System.out.println("Kmeans\n:" + sKmeans); System.out.println(Arrays.toString(sKmeans.getAssignments())); assignments = Arrays.asList(sKmeans.getAssignments()); } catch (Exception e) { e.printStackTrace(); } System.out.println("Assignments\n: " + assignments); ClusterEvaluation eval = new ClusterEvaluation(); eval.setClusterer(sKmeans); try { eval.evaluateClusterer(data); } catch (Exception e) { e.printStackTrace(); } int classesToClustersMap[] = eval.getClassesToClusters(); for (int i = 0; i < classesToClustersMap.length; i++) { if (assignments.get(i).equals(((Integer) classesToClustersMap[(int) data.get(i).classValue()]))) { ((Instances) sampled_data).add(data.get(i)); } } return ((Instances) sampled_data); }