List of usage examples for weka.clusterers Clusterer buildClusterer
void buildClusterer(Instances data) throws Exception;
From source file:adams.flow.transformer.WekaTrainClusterer.java
License:Open Source License
/** * Executes the flow item.//from w w w . j a v a 2s .co m * * @return null if everything is fine, otherwise error message */ @Override protected String doExecute() { String result; Instances data; Instance inst; weka.clusterers.Clusterer cls; WekaModelContainer cont; result = null; try { cls = null; if ((m_InputToken != null) && (m_InputToken.getPayload() instanceof Instances)) { cls = getClustererInstance(); data = (Instances) m_InputToken.getPayload(); cls.buildClusterer(data); cont = new WekaModelContainer(cls, new Instances(data, 0), data); cont = m_PostProcessor.postProcess(cont); m_OutputToken = new Token(cont); } else if ((m_InputToken != null) && (m_InputToken.getPayload() instanceof Instance)) { if (m_IncrementalClusterer == null) { cls = getClustererInstance(); if (!(cls instanceof UpdateableClusterer)) result = m_Clusterer + "/" + cls.getClass().getName() + " is not an incremental clusterer!"; } if (result == null) { inst = (Instance) m_InputToken.getPayload(); if (m_IncrementalClusterer == null) { m_IncrementalClusterer = cls; data = new Instances(inst.dataset(), 1); data.add((Instance) inst.copy()); m_IncrementalClusterer.buildClusterer(data); } else { ((UpdateableClusterer) m_IncrementalClusterer).updateClusterer(inst); ((UpdateableClusterer) m_IncrementalClusterer).updateFinished(); } m_OutputToken = new Token( new WekaModelContainer(m_IncrementalClusterer, new Instances(inst.dataset(), 0))); } } } catch (Exception e) { m_OutputToken = null; result = handleException("Failed to process input: " + m_InputToken.getPayload(), e); } if (m_OutputToken != null) updateProvenance(m_OutputToken); return result; }
From source file:adams.flow.transformer.WekaTrainTestSetClustererEvaluator.java
License:Open Source License
/** * Executes the flow item.//from w ww .ja va2s . c om * * @return null if everything is fine, otherwise error message */ @Override protected String doExecute() { String result; Instances train; Instances test; weka.clusterers.Clusterer cls; ClusterEvaluation eval; WekaTrainTestSetContainer cont; result = null; try { // cross-validate clusterer cls = getClustererInstance(); if (cls == null) throw new IllegalStateException("Clusterer '" + getClusterer() + "' not found!"); cont = (WekaTrainTestSetContainer) m_InputToken.getPayload(); train = (Instances) cont.getValue(WekaTrainTestSetContainer.VALUE_TRAIN); test = (Instances) cont.getValue(WekaTrainTestSetContainer.VALUE_TEST); cls.buildClusterer(train); eval = new ClusterEvaluation(); eval.setClusterer(cls); eval.evaluateClusterer(test, null, m_OutputModel); // broadcast result m_OutputToken = new Token(new WekaClusterEvaluationContainer(eval, cls)); } catch (Exception e) { m_OutputToken = null; result = handleException("Failed to evaluate: ", e); } if (m_OutputToken != null) updateProvenance(m_OutputToken); return result; }
From source file:adams.ml.model.clustering.WekaClusterer.java
License:Open Source License
/** * Builds a model from the data./*from w w w .j a va 2 s .c o m*/ * * @param data the data to use for building the model * @return the generated model * @throws Exception if the build fails */ @Override protected ClusteringModel doBuildModel(Dataset data) throws Exception { Instances inst; weka.clusterers.Clusterer clusterer; inst = WekaConverter.toInstances(data); clusterer = (weka.clusterers.Clusterer) OptionUtils.shallowCopy(m_Clusterer); if (clusterer == null) throw new Exception( "Failed to create shallow copy of classifier: " + OptionUtils.getCommandLine(m_Clusterer)); clusterer.buildClusterer(inst); return new WekaClusteringModel(clusterer, data, inst); }
From source file:com.actelion.research.orbit.imageAnalysis.tasks.TrainWorker.java
License:Open Source License
private void createClusterer() { int MAX_TILES_CLUSTERING = 50; if (iFrames == null || iFrames.size() < 1) { logger.error("cannot build clusterer, no open image frames."); return;//from w w w. ja v a 2 s .co m } if (modelToBuild != null && modelToBuild.getClassifier() != null) modelToBuild.getClassifier().setBuild(false); int windowSize = modelToBuild.getFeatureDescription().getWindowSize(); List<double[]> trainData = new ArrayList<double[]>(); for (ImageFrame iFrame : iFrames) { PlanarImage image = iFrame.recognitionFrame.bimg.getImage(); TissueFeatures tissueFeatures = new TissueFeatures(modelToBuild.getFeatureDescription(), iFrame.recognitionFrame.bimg); Point[] tileArr = image.getTileIndices(null); if (tileArr.length > MAX_TILES_CLUSTERING) { logger.trace("number of tiles for clustering: " + tileArr.length); List<Point> pList = new ArrayList<Point>(tileArr.length); for (Point p : tileArr) pList.add(p); Collections.shuffle(pList); pList = pList.subList(0, MAX_TILES_CLUSTERING); tileArr = pList.toArray(new Point[0]); logger.trace("number of tiles after tile limit: " + tileArr.length); } for (Point tileNum : tileArr) { Raster r = image.getTile(tileNum.x, tileNum.y); for (int x = image.tileXToX(tileNum.x); x < Math .min(image.tileXToX(tileNum.x) + image.getTileWidth(), image.getWidth()); x++) { for (int y = image.tileYToY(tileNum.y); y < Math .min(image.tileYToY(tileNum.y) + image.getTileHeight(), image.getHeight()); y++) { if ((x < r.getMinX() + windowSize) || (y < r.getMinY() + windowSize) || (x > r.getMinX() + r.getWidth() - windowSize - 1) || (y > r.getMinY() + r.getHeight() - windowSize - 1)) continue; double[] feats = null; try { feats = tissueFeatures.buildFeatures(r, x, y, Double.NaN); } catch (Throwable t) { System.out.println(t.getMessage()); t.printStackTrace(); } trainData.add(feats); } // y checkPaused(); if (isCancelled()) { cleanUp(); return; } } // x } // tileNum } // iFrames timeEst = 1000 * 60L; setProgress(20); // trainData -> instances checkPaused(); if (isCancelled()) { cleanUp(); return; } trainSet = null; Attribute classAttr = null; // create the first time a new trainSet. All further trainings will append new instances. if (trainSet == null) { // build traindata header double[] firstRow = trainData.get(0); ArrayList<Attribute> attrInfo = new ArrayList<Attribute>(firstRow.length); for (int a = 0; a < firstRow.length - 1; a++) { Attribute attr = new Attribute("a" + a); // if (a<firstRow.length-2) attr.setWeight(0.1d); else attr.setWeight(1.0d); attrInfo.add(attr); } List<String> classValues = new ArrayList<String>( iFrames.get(0).recognitionFrame.getClassShapes().size()); for (int i = 0; i < iFrames.get(0).recognitionFrame.getClassShapes().size(); i++) { classValues.add((i + 1) + ".0"); // "1.0", "2.0", ... } classAttr = new Attribute("class", classValues); attrInfo.add(classAttr); trainSet = new Instances("trainSet pattern classes", attrInfo, trainData.size()); trainSet.setClassIndex(firstRow.length - 1); } else classAttr = trainSet.attribute("class"); timeEst = 1000 * 45L; setProgress(25); // add instances checkPaused(); if (isCancelled()) { cleanUp(); return; } for (double[] vals : trainData) { double classV = Double.NaN; vals[vals.length - 1] = classV; Instance inst = new DenseInstance(1.0d, vals); trainSet.add(inst); } trainSet = trainSet.resample(rand); trainSet.setClassIndex(-1); Instances ts = new Instances(trainSet, 0); ts.addAll(trainSet.subList(0, Math.min(MAX_CLUSTERING_EXAMPLES, trainSet.size() - 1))); trainSet = null; trainSet = ts; logger.debug("trainSet contains " + trainSet.numInstances() + " instances, class Attribute: " + trainSet.classIndex()); logger.info("start building clusterer..."); timeEst = 1000 * 40L; setProgress(30); // build clusterer checkPaused(); if (isCancelled()) { cleanUp(); return; } // Clusterer clusterer = new weka.clusterers.SimpleKMeans(); //Clusterer clusterer = new MakeDensityBasedClusterer(new SimpleKMeans()); Clusterer clusterer = new EM(); try { //((weka.clusterers.SimpleKMeans)clusterer).setNumClusters(iFrames.get(0).recognitionFrame.getClassShapes().size()); // ((MakeDensityBasedClusterer)clusterer).setNumClusters(iFrames.get(0).recognitionFrame.getClassShapes().size()); ((EM) clusterer).setNumClusters(iFrames.get(0).recognitionFrame.getClassShapes().size()); clusterer.buildClusterer(trainSet); } catch (Exception e) { logger.error( "cannot build clusterer or cannot set number of clusters (classShapes not correctly initialized?)"); e.printStackTrace(); } logger.info( "done. (clusterer is densityBasedClusterer: " + (clusterer instanceof DensityBasedClusterer) + ")"); // sort class labels according to priors classifier = new ClassifierWrapper(clusterer); classifier.setBuild(true); this.trainSet = trainSet.stringFreeStructure(); modelToBuild.setClassifier(classifier); modelToBuild.setStructure(trainSet.stringFreeStructure()); }
From source file:com.rapidminer.operator.learner.clustering.clusterer.GenericWekaClusteringAdaptor.java
License:Open Source License
public ClusterModel createClusterModel(ExampleSet exampleSet) throws OperatorException { weka.clusterers.Clusterer clusterer = getWekaClusterer( WekaTools.getWekaParametersFromTypes(this, wekaParameters)); weka.core.Instances instances = WekaTools.toWekaInstances(exampleSet, "ClusterInstances", WekaInstancesAdaptor.CLUSTERING); try {//w w w . ja v a 2 s . c o m clusterer.buildClusterer(instances); WekaCluster wekaCluster = new WekaCluster(exampleSet, clusterer); exampleSet = wekaCluster.apply(exampleSet); } catch (Exception e) { throw new UserError(this, e, 905, new Object[] { getOperatorClassName(), e }); } ClusterModel clusterModel = createWekaBasedClusterModel(exampleSet); return clusterModel; }
From source file:core.ClusterEvaluationEX.java
License:Open Source License
/** * Evaluates a clusterer with the options given in an array of * strings. It takes the string indicated by "-t" as training file, the * string indicated by "-T" as test file. * If the test file is missing, a stratified ten-fold * cross-validation is performed (distribution clusterers only). * Using "-x" you can change the number of * folds to be used, and using "-s" the random seed. * If the "-p" option is present it outputs the classification for * each test instance. If you provide the name of an object file using * "-l", a clusterer will be loaded from the given file. If you provide the * name of an object file using "-d", the clusterer built from the * training data will be saved to the given file. * * @param clusterer machine learning clusterer * @param options the array of string containing the options * @throws Exception if model could not be evaluated successfully * @return a string describing the results *//*from w w w . j av a 2 s . c o m*/ public static String evaluateClusterer(Clusterer clusterer, String[] options) throws Exception { int seed = 1, folds = 10; boolean doXval = false; Instances train = null; Random random; String trainFileName, testFileName, seedString, foldsString; String objectInputFileName, objectOutputFileName, attributeRangeString; String graphFileName; String[] savedOptions = null; boolean printClusterAssignments = false; Range attributesToOutput = null; StringBuffer text = new StringBuffer(); int theClass = -1; // class based evaluation of clustering boolean updateable = (clusterer instanceof UpdateableClusterer); DataSource source = null; Instance inst; if (Utils.getFlag('h', options) || Utils.getFlag("help", options)) { // global info requested as well? boolean globalInfo = Utils.getFlag("synopsis", options) || Utils.getFlag("info", options); throw new Exception("Help requested." + makeOptionString(clusterer, globalInfo)); } try { // Get basic options (options the same for all clusterers //printClusterAssignments = Utils.getFlag('p', options); objectInputFileName = Utils.getOption('l', options); objectOutputFileName = Utils.getOption('d', options); trainFileName = Utils.getOption('t', options); testFileName = Utils.getOption('T', options); graphFileName = Utils.getOption('g', options); // Check -p option try { attributeRangeString = Utils.getOption('p', options); } catch (Exception e) { throw new Exception(e.getMessage() + "\nNOTE: the -p option has changed. " + "It now expects a parameter specifying a range of attributes " + "to list with the predictions. Use '-p 0' for none."); } if (attributeRangeString.length() != 0) { printClusterAssignments = true; if (!attributeRangeString.equals("0")) attributesToOutput = new Range(attributeRangeString); } if (trainFileName.length() == 0) { if (objectInputFileName.length() == 0) { throw new Exception("No training file and no object " + "input file given."); } if (testFileName.length() == 0) { throw new Exception("No training file and no test file given."); } } else { if ((objectInputFileName.length() != 0) && (printClusterAssignments == false)) { throw new Exception("Can't use both train and model file " + "unless -p specified."); } } seedString = Utils.getOption('s', options); if (seedString.length() != 0) { seed = Integer.parseInt(seedString); } foldsString = Utils.getOption('x', options); if (foldsString.length() != 0) { folds = Integer.parseInt(foldsString); doXval = true; } } catch (Exception e) { throw new Exception('\n' + e.getMessage() + makeOptionString(clusterer, false)); } try { if (trainFileName.length() != 0) { source = new DataSource(trainFileName); train = source.getStructure(); String classString = Utils.getOption('c', options); if (classString.length() != 0) { if (classString.compareTo("last") == 0) theClass = train.numAttributes(); else if (classString.compareTo("first") == 0) theClass = 1; else theClass = Integer.parseInt(classString); if (theClass != -1) { if (doXval || testFileName.length() != 0) throw new Exception("Can only do class based evaluation on the " + "training data"); if (objectInputFileName.length() != 0) throw new Exception("Can't load a clusterer and do class based " + "evaluation"); if (objectOutputFileName.length() != 0) throw new Exception("Can't do class based evaluation and save clusterer"); } } else { // if the dataset defines a class attribute, use it if (train.classIndex() != -1) { theClass = train.classIndex() + 1; System.err .println("Note: using class attribute from dataset, i.e., attribute #" + theClass); } } if (theClass != -1) { if (theClass < 1 || theClass > train.numAttributes()) throw new Exception("Class is out of range!"); if (!train.attribute(theClass - 1).isNominal()) throw new Exception("Class must be nominal!"); train.setClassIndex(theClass - 1); } } } catch (Exception e) { throw new Exception("ClusterEvaluation: " + e.getMessage() + '.'); } // Save options if (options != null) { savedOptions = new String[options.length]; System.arraycopy(options, 0, savedOptions, 0, options.length); } if (objectInputFileName.length() != 0) Utils.checkForRemainingOptions(options); // Set options for clusterer if (clusterer instanceof OptionHandler) ((OptionHandler) clusterer).setOptions(options); Utils.checkForRemainingOptions(options); Instances trainHeader = train; if (objectInputFileName.length() != 0) { // Load the clusterer from file // clusterer = (Clusterer) SerializationHelper.read(objectInputFileName); java.io.ObjectInputStream ois = new java.io.ObjectInputStream( new java.io.BufferedInputStream(new java.io.FileInputStream(objectInputFileName))); clusterer = (Clusterer) ois.readObject(); // try and get the training header try { trainHeader = (Instances) ois.readObject(); } catch (Exception ex) { // don't moan if we cant } } else { // Build the clusterer if no object file provided if (theClass == -1) { if (updateable) { clusterer.buildClusterer(source.getStructure()); while (source.hasMoreElements(train)) { inst = source.nextElement(train); ((UpdateableClusterer) clusterer).updateClusterer(inst); } ((UpdateableClusterer) clusterer).updateFinished(); } else { clusterer.buildClusterer(source.getDataSet()); } } else { Remove removeClass = new Remove(); removeClass.setAttributeIndices("" + theClass); removeClass.setInvertSelection(false); removeClass.setInputFormat(train); if (updateable) { Instances clusterTrain = Filter.useFilter(train, removeClass); clusterer.buildClusterer(clusterTrain); trainHeader = clusterTrain; while (source.hasMoreElements(train)) { inst = source.nextElement(train); removeClass.input(inst); removeClass.batchFinished(); Instance clusterTrainInst = removeClass.output(); ((UpdateableClusterer) clusterer).updateClusterer(clusterTrainInst); } ((UpdateableClusterer) clusterer).updateFinished(); } else { Instances clusterTrain = Filter.useFilter(source.getDataSet(), removeClass); clusterer.buildClusterer(clusterTrain); trainHeader = clusterTrain; } ClusterEvaluationEX ce = new ClusterEvaluationEX(); ce.setClusterer(clusterer); ce.evaluateClusterer(train, trainFileName); return "\n\n=== Clustering stats for training data ===\n\n" + ce.clusterResultsToString(); } } /* Output cluster predictions only (for the test data if specified, otherwise for the training data */ if (printClusterAssignments) { return printClusterings(clusterer, trainFileName, testFileName, attributesToOutput); } text.append(clusterer.toString()); text.append( "\n\n=== Clustering stats for training data ===\n\n" + printClusterStats(clusterer, trainFileName)); if (testFileName.length() != 0) { // check header compatibility DataSource test = new DataSource(testFileName); Instances testStructure = test.getStructure(); if (!trainHeader.equalHeaders(testStructure)) { throw new Exception("Training and testing data are not compatible\n"); } text.append("\n\n=== Clustering stats for testing data ===\n\n" + printClusterStats(clusterer, testFileName)); } if ((clusterer instanceof DensityBasedClusterer) && (doXval == true) && (testFileName.length() == 0) && (objectInputFileName.length() == 0)) { // cross validate the log likelihood on the training data random = new Random(seed); random.setSeed(seed); train = source.getDataSet(); train.randomize(random); text.append(crossValidateModel(clusterer.getClass().getName(), train, folds, savedOptions, random)); } // Save the clusterer if an object output file is provided if (objectOutputFileName.length() != 0) { //SerializationHelper.write(objectOutputFileName, clusterer); saveClusterer(objectOutputFileName, clusterer, trainHeader); } // If classifier is drawable output string describing graph if ((clusterer instanceof Drawable) && (graphFileName.length() != 0)) { BufferedWriter writer = new BufferedWriter(new FileWriter(graphFileName)); writer.write(((Drawable) clusterer).graph()); writer.newLine(); writer.flush(); writer.close(); } return text.toString(); }
From source file:de.unidue.langtech.grading.tc.ClusteringTask.java
License:Open Source License
@Override public void execute(TaskContext aContext) throws Exception { if (learningMode.equals(Constants.LM_MULTI_LABEL)) { throw new IllegalArgumentException("Cannot use multi-label setup in clustering."); }/*from www.j av a 2s .c o m*/ boolean multiLabel = false; File arffFileTrain = new File( aContext.getStorageLocation(TEST_TASK_INPUT_KEY_TRAINING_DATA, AccessMode.READONLY).getPath() + "/" + TRAINING_DATA_FILENAME); Instances trainData = TaskUtils.getInstances(arffFileTrain, multiLabel); // get number of outcomes List<String> trainOutcomeValues = TaskUtils.getClassLabels(trainData, multiLabel); Clusterer clusterer = AbstractClusterer.forName(clusteringArguments.get(0), clusteringArguments.subList(1, clusteringArguments.size()).toArray(new String[0])); Instances copyTrainData = new Instances(trainData); trainData = WekaUtils.removeOutcomeId(trainData, multiLabel); // generate data for clusterer (w/o class) Remove filter = new Remove(); filter.setAttributeIndices("" + (trainData.classIndex() + 1)); filter.setInputFormat(trainData); Instances clusterTrainData = Filter.useFilter(trainData, filter); clusterer.buildClusterer(clusterTrainData); // get a mapping from clusterIDs to instance offsets in the ARFF Map<Integer, Set<Integer>> clusterMap = getClusterMap(clusterTrainData, clusterer); Map<String, String> instanceId2TextMap = getInstanceId2TextMap(aContext); ConditionalFrequencyDistribution<Integer, String> clusterAssignments = new ConditionalFrequencyDistribution<Integer, String>(); for (Integer clusterId : clusterMap.keySet()) { System.out.println("CLUSTER: " + clusterId); for (Integer offset : clusterMap.get(clusterId)) { // get instance ID from instance Instance instance = copyTrainData.get(offset); Double classOffset = new Double(instance.value(copyTrainData.classAttribute())); String label = (String) trainOutcomeValues.get(classOffset.intValue()); clusterAssignments.addSample(clusterId, label); String instanceId = instance .stringValue(copyTrainData.attribute(AddIdFeatureExtractor.ID_FEATURE_NAME).index()); System.out.println(label + "\t" + instanceId2TextMap.get(instanceId)); } System.out.println(); } System.out.println("ID\tSIZE\tPURITY\tRMSE"); for (Integer clusterId : clusterMap.keySet()) { FrequencyDistribution<String> fd = clusterAssignments.getFrequencyDistribution(clusterId); double purity = (double) fd.getCount(fd.getSampleWithMaxFreq()) / fd.getN(); String purityString = String.format("%.2f", purity); double rmse = getRMSE(fd, trainOutcomeValues); String rmseString = String.format("%.2f", rmse); System.out.println( clusterId + "\t" + clusterMap.get(clusterId).size() + "\t" + purityString + "\t" + rmseString); } System.out.println(); }
From source file:de.unidue.langtech.grading.tc.ClusterTrainTask.java
License:Open Source License
@Override public void execute(TaskContext aContext) throws Exception { if (learningMode.equals(Constants.LM_MULTI_LABEL)) { throw new IllegalArgumentException("Cannot use multi-label setup in clustering."); }/* ww w . j ava 2 s.com*/ boolean multiLabel = false; File arffFileTrain = new File( aContext.getStorageLocation(TEST_TASK_INPUT_KEY_TRAINING_DATA, AccessMode.READONLY).getPath() + "/" + TRAINING_DATA_FILENAME); Instances trainData = TaskUtils.getInstances(arffFileTrain, multiLabel); // get number of outcomes List<String> trainOutcomeValues = TaskUtils.getClassLabels(trainData, multiLabel); Clusterer clusterer = AbstractClusterer.forName(clusteringArguments.get(0), clusteringArguments.subList(1, clusteringArguments.size()).toArray(new String[0])); Instances copyTrainData = new Instances(trainData); trainData = WekaUtils.removeOutcomeId(trainData, multiLabel); // generate data for clusterer (w/o class) Remove filter = new Remove(); filter.setAttributeIndices("" + (trainData.classIndex() + 1)); filter.setInputFormat(trainData); Instances clusterTrainData = Filter.useFilter(trainData, filter); clusterer.buildClusterer(clusterTrainData); // get a mapping from clusterIDs to instance offsets in the ARFF Map<Integer, Set<Integer>> clusterMap = getClusterMap(clusterTrainData, clusterer); // get a CFD that stores the number of outcomes for each class indexed by the clusterID ConditionalFrequencyDistribution<Integer, String> clusterCfd = getClusterCfd(clusterMap, copyTrainData, trainOutcomeValues); Map<Integer, String> mostFrequentClassPerCluster = new HashMap<Integer, String>(); Map<Integer, Double> clusterScoreMap = new HashMap<Integer, Double>(); for (Integer clusterId : clusterMap.keySet()) { FrequencyDistribution<String> fd = clusterCfd.getFrequencyDistribution(clusterId); mostFrequentClassPerCluster.put(clusterId, fd.getSampleWithMaxFreq()); double purity = (double) fd.getCount(fd.getSampleWithMaxFreq()) / fd.getN(); // attention - cannot simply use RMSE here - as smaller values are better unlike with purity // double rmse = getRMSE(fd, trainOutcomeValues); clusterScoreMap.put(clusterId, purity); } // sort clusters by score Map<Integer, Double> sortedClusters = new TreeMap<Integer, Double>(new ValueComparator(clusterScoreMap)); sortedClusters.putAll(clusterScoreMap); // change the outcome values of instances according to the most frequent class in its cluster double avgPurity = 0.0; int n = 0; for (Integer clusterId : sortedClusters.keySet()) { // we need to take as many clusters until we have seen at least each class once if (onlyPureClusters && trainOutcomeValues.size() == 0) { break; } // // do not use clusters of single responses, as they always have purity of 1 // if (clusterCfd.getFrequencyDistribution(clusterId).getN() == 1) { // continue; // } n++; avgPurity += clusterScoreMap.get(clusterId); String mostFrequentClass = mostFrequentClassPerCluster.get(clusterId); trainOutcomeValues.remove(mostFrequentClass); for (Integer instanceOffset : clusterMap.get(clusterId)) { copyTrainData.get(instanceOffset).setValue(copyTrainData.classIndex(), mostFrequentClass); } } avgPurity = avgPurity / n; System.out.println("Average cluster purity: " + avgPurity); // write the new training data (that will be used by the test task instead of the original one) DataSink.write(aContext.getStorageLocation(ADAPTED_TRAINING_DATA, AccessMode.READWRITE).getPath() + "/" + ARFF_FILENAME, copyTrainData); }
From source file:guineu.modules.dataanalysis.clustering.em.EMClusterer.java
License:Open Source License
public List<Integer> getClusterGroups(Instances dataset) { List<Integer> clusters = new ArrayList<Integer>(); String[] options = new String[2]; Clusterer clusterer = new EM(); int numberOfIterations = parameters.getParameter(EMClustererParameters.numberOfIterations).getValue(); options[0] = "-I"; options[1] = String.valueOf(numberOfIterations); try {/*from w w w .j a v a 2 s .c o m*/ ((EM) clusterer).setOptions(options); clusterer.buildClusterer(dataset); Enumeration e = dataset.enumerateInstances(); while (e.hasMoreElements()) { clusters.add(clusterer.clusterInstance((Instance) e.nextElement())); } this.numberOfGroups = clusterer.numberOfClusters(); } catch (Exception ex) { Logger.getLogger(EMClusterer.class.getName()).log(Level.SEVERE, null, ex); } return clusters; }
From source file:guineu.modules.dataanalysis.clustering.farthestfirst.FarthestFirstClusterer.java
License:Open Source License
public List<Integer> getClusterGroups(Instances dataset) { List<Integer> clusters = new ArrayList<Integer>(); String[] options = new String[2]; Clusterer clusterer = new FarthestFirst(); int numberOfGroups = parameters.getParameter(FarthestFirstClustererParameters.numberOfGroups).getValue(); options[0] = "-N"; options[1] = String.valueOf(numberOfGroups); try {// ww w.j a v a 2s.c om ((FarthestFirst) clusterer).setOptions(options); clusterer.buildClusterer(dataset); Enumeration e = dataset.enumerateInstances(); while (e.hasMoreElements()) { clusters.add(clusterer.clusterInstance((Instance) e.nextElement())); } this.numberOfGroups = clusterer.numberOfClusters(); } catch (Exception ex) { Logger.getLogger(FarthestFirstClusterer.class.getName()).log(Level.SEVERE, null, ex); } return clusters; }