List of usage examples for weka.core Instances numInstances
publicint numInstances()
From source file:entity.NoiseInjectionManager.java
License:Open Source License
/** * //from w w w . j av a 2s. c o m * Increments fp and fn by specified percentages. * Randomize order of instances and modifies instances until noise quota is reached. * Than randomized instances again. * NOTE: It modifies the given dataset, because it is a reference. * * @param origDataset * @param fpPercentage * @param fnPercentage * @return Instances noisyDataset */ public Instances addNoiseToDataset(Instances origDataset, BigDecimal fpPercentage, BigDecimal fnPercentage) { // exits if no noise must be added if (fnPercentage.equals(BigDecimal.ZERO) && fpPercentage.equals(BigDecimal.ZERO)) { if (verbose) System.out.println("[NoiseManager , addNoiseToDataset] nessun errore da aggiungere"); return origDataset; } // total instances in dataset int numInstances = origDataset.numInstances(); // finds positive (buggy) and negative (non-buggy) instances numbers int numOfPositives = 0; int numOfNegatives = 0; for (int j = 0; j < numInstances; j++) { if (origDataset.instance(j).stringValue(origDataset.classIndex()).equals(Settings.buggyLabel)) { numOfPositives++; } // this is a redundant control, but better safe than sorry else if (origDataset.instance(j).stringValue(origDataset.classIndex()).equals(Settings.nonbuggyLabel)) { numOfNegatives++; } } // calculates the number of false positives to insert int fpToInsert = (int) Math.round(numOfNegatives * fpPercentage.doubleValue() / 100); int fpInserted = 0; if (verbose) System.out.println("\n\n[NoiseManager , addNoiseToDataset] fpToInsert= " + fpToInsert + ", totIntances= " + origDataset.numInstances() + " true negatives= " + numOfNegatives + " %fp= " + fpPercentage); // calculates the number of false negatives to insert int fnToInsert = (int) Math.round(numOfPositives * fnPercentage.doubleValue() / 100); int fnInserted = 0; if (verbose) System.out.println("[NoiseManager , addNoiseToDataset] fnToInsert= " + fnToInsert + ", totIntances= " + origDataset.numInstances() + " true positives= " + numOfPositives + " %fn= " + fnPercentage); if (verbose) System.out.println("[NoiseManager , addNoiseToDataset] buggy label: " + Settings.buggyLabel + " - nonbuggy label: " + Settings.nonbuggyLabel); // randomize order of instances origDataset.randomize(RandomizationManager.randomGenerator); for (int i = 0; i < origDataset.numInstances(); i++) { if (verbose) System.out.print("\nORIGINAL VALUES: " + origDataset.instance(i).value(origDataset.attribute(origDataset.classIndex())) + " - " + origDataset.instance(i).stringValue(origDataset.classIndex())); // gets the classification attribute (it HAS to be the last) Attribute att = origDataset.instance(i).attribute(origDataset.classIndex()); // if there are fn to add and this is a positive instances it turns it into a negative, making it a fn if ((fnInserted < fnToInsert) && (origDataset.instance(i).stringValue(origDataset.classIndex()) .equals(Settings.buggyLabel))) { origDataset.instance(i).setValue(att, Settings.nonbuggyLabel); fnInserted++; if (verbose) System.out.print(" - added FN, added " + fnInserted + " of " + fnToInsert + " "); } // if there are fp to add and this is a negative instances it turns it into a positive, making it a fp else if ((fpInserted < fpToInsert) && (origDataset.instance(i).stringValue(origDataset.classIndex()) .equals(Settings.nonbuggyLabel))) { origDataset.instance(i).setValue(att, Settings.buggyLabel); fpInserted++; if (verbose) System.out.print(" - added FP, added " + fpInserted + " of " + fpToInsert + " "); } if (verbose) System.out.print(" FINAL ELEMENT VALUES: " + origDataset.instance(i).value(origDataset.attribute(origDataset.classIndex())) + " - " + origDataset.instance(i).stringValue(origDataset.classIndex())); } // randomize order of instances origDataset.randomize(RandomizationManager.randomGenerator); return origDataset; }
From source file:entity.NoiseInjectionManager.java
License:Open Source License
/** * Increments fp and fn in combination by a specified percentages. * Randomize order of instances and modifies instances until noise quota is reached. * Than randomized instances again.//from w ww .ja v a 2 s . c o m * NOTE: It modifies the given dataset, because it is a reference. * * @param origDataset * @param combinedFpFnPercentage * @return noisydata */ public Instances addNoiseToDataset(Instances origDataset, BigDecimal combinedFpFnPercentage) { // exits if no noise must be added if (combinedFpFnPercentage.equals(BigDecimal.ZERO)) { if (verbose) System.out.println("[NoiseManager , addNoiseToDataset] nessun errore da aggiungere"); return origDataset; } // total instances in dataset int numInstances = origDataset.numInstances(); // finds positive (buggy) and negative (non-buggy) instances numbers int fpAndFnToInsert = (int) Math.round(numInstances * combinedFpFnPercentage.doubleValue() / 100); int fpAndFnInserted = 0; if (verbose) System.out.println("\n\n[NoiseManager , addNoiseToDataset] fpAndFnToInsert= " + fpAndFnToInsert + ", totIntances= " + origDataset.numInstances()); if (verbose) System.out.println("[NoiseManager , addNoiseToDataset] buggy label: " + Settings.buggyLabel + " - nonbuggy label: " + Settings.nonbuggyLabel); // randomize order of instances origDataset.randomize(RandomizationManager.randomGenerator); for (int i = 0; i < origDataset.numInstances(); i++) { if (verbose) System.out.print("\nORIGINAL VALUES: " + origDataset.instance(i).value(origDataset.attribute(origDataset.classIndex())) + " - " + origDataset.instance(i).stringValue(origDataset.classIndex())); // gets the classification attribute (it HAS to be the last) Attribute att = origDataset.instance(i).attribute(origDataset.classIndex()); // if there are fn or fp to add if (fpAndFnInserted < fpAndFnToInsert) { // if this is a positive instances it turns it into a negative, making it a fn if (origDataset.instance(i).stringValue(origDataset.classIndex()).equals(Settings.buggyLabel)) { if (verbose) System.out.print(" - added FN, added " + fpAndFnInserted + " of " + fpAndFnToInsert + " "); origDataset.instance(i).setValue(att, Settings.nonbuggyLabel); fpAndFnInserted++; } // if this is a negative instances it turns it into a positive, making it a fp else if (origDataset.instance(i).stringValue(origDataset.classIndex()) .equals(Settings.nonbuggyLabel)) { if (verbose) System.out.print(" - added FP, added " + fpAndFnInserted + " of " + fpAndFnToInsert + " "); origDataset.instance(i).setValue(att, Settings.buggyLabel); fpAndFnInserted++; } } if (verbose) System.out.print(" FINAL ELEMENT VALUES: " + origDataset.instance(i).value(origDataset.attribute(origDataset.classIndex())) + " - " + origDataset.instance(i).stringValue(origDataset.classIndex())); } // randomize order of instances origDataset.randomize(RandomizationManager.randomGenerator); return origDataset; }
From source file:ergasia2pkg.LP_ROS.java
/** * Groups instances by their labels//from w w w .j a va2 s . c o m * * @param MultilabelInstances labeled instances * @return HashMap<String,List<Instance>> returned Hashmap with grouping */ public HashMap groupByLabelSet(MultiLabelInstances mlData) { Instances inst = mlData.getDataSet(); Set<Attribute> atts = mlData.getLabelAttributes(); HashMap LabelSetGroups = new HashMap<String, List<Instance>>(); for (int i = 0; i < inst.numInstances(); i++) { Instance in = inst.get(i); String labelsetName = ""; for (Attribute att : atts) { if (in.value(att) != 0) { labelsetName = labelsetName + att.name(); } } if (LabelSetGroups.containsKey(labelsetName)) { List myList = (List) LabelSetGroups.get(labelsetName); myList.add(in); LabelSetGroups.put(labelsetName, myList); } else { List<Instance> myList = new ArrayList<Instance>(); myList.add(in); LabelSetGroups.put(labelsetName, myList); } } return LabelSetGroups; }
From source file:es.jarias.FMC.ClassCompoundTransformation.java
License:Open Source License
/** * //from w ww .j a va 2s .c om * @param mlData * @return the transformed instances * @throws Exception */ public Instances transformInstances(MultiLabelInstances mlData) throws Exception { data = mlData.getDataSet(); numLabels = mlData.getNumLabels(); labelIndices = mlData.getLabelIndices(); Instances newData = null; // This must be different in order to combine ALL class states, not only existing ones. // gather distinct label combinations // ASSUME CLASSES ARE BINARY ArrayList<LabelSet> labelSets = new ArrayList<LabelSet>(); double[] dblLabels = new double[numLabels]; double nCombinations = Math.pow(2, numLabels); for (int i = 0; i < nCombinations; i++) { for (int l = 0; l < numLabels; l++) { int digit = (int) Math.pow(2, numLabels - 1 - l); dblLabels[l] = (digit & i) / digit; } LabelSet labelSet = new LabelSet(dblLabels); labelSets.add(labelSet); } // for (int i = 0; i < numInstances; i++) { // // construct labelset // double[] dblLabels = new double[numLabels]; // for (int j = 0; j < numLabels; j++) { // int index = labelIndices[j]; // dblLabels[j] = Double.parseDouble(data.attribute(index).value((int) data.instance(i).value(index))); // } // LabelSet labelSet = new LabelSet(dblLabels); // // // add labelset if not already present // labelSets.add(labelSet); // } // create class attribute ArrayList<String> classValues = new ArrayList<String>(labelSets.size()); for (LabelSet subset : labelSets) { classValues.add(subset.toBitString()); } newClass = new Attribute("class", classValues); // for (String s : classValues) // { // System.out.print(s+", "); // // } // System.out.println(); // remove all labels newData = RemoveAllLabels.transformInstances(data, labelIndices); // add new class attribute newData.insertAttributeAt(newClass, newData.numAttributes()); newData.setClassIndex(newData.numAttributes() - 1); // add class values for (int i = 0; i < newData.numInstances(); i++) { //System.out.println(newData.instance(i).toString()); String strClass = ""; for (int j = 0; j < numLabels; j++) { int index = labelIndices[j]; strClass = strClass + data.attribute(index).value((int) data.instance(i).value(index)); } //System.out.println(strClass); newData.instance(i).setClassValue(strClass); } transformedFormat = new Instances(newData, 0); return newData; }
From source file:es.jarias.FMC.FMC.java
License:Open Source License
public static double[][] mutualInfo(Instances data, int[] indexes) { double[][] m_counts = new double[indexes.length][]; double[][][] m_2counts = new double[indexes.length][indexes.length][]; double[] nValues = new double[indexes.length]; double[][] I = new double[indexes.length][indexes.length]; for (int i = 0; i < indexes.length; i++) { nValues[i] = data.attribute(indexes[i]).numValues(); m_counts[i] = new double[(int) nValues[i]]; }//from w w w .j a va 2 s . c o m for (int i = 0; i < indexes.length; i++) { for (int j = 0; j < indexes.length; j++) { if (i != j) { double cardinality = nValues[i] * nValues[j]; m_2counts[i][j] = new double[(int) cardinality]; } } } // Compute counts: for (Instance d : data) { for (int i = 0; i < indexes.length; i++) { m_counts[i][(int) d.value(indexes[i])]++; for (int j = 0; j < indexes.length; j++) { if (i != j) { int index = (int) (d.value(indexes[j]) * nValues[i] + d.value(indexes[i])); m_2counts[i][j][index]++; } } } } // Calculate MI(X_i; X_j) for (int i = 0; i < indexes.length; i++) { for (int j = 0; j < indexes.length; j++) { if (i != j) { double mi = 0.0; for (int v_i = 0; v_i < nValues[i]; v_i++) { for (int v_j = 0; v_j < nValues[j]; v_j++) { if ((1.0 * data.numInstances() * m_2counts[i][j][(int) (v_j * nValues[i] + v_i)]) / (1.0 * m_counts[i][v_i] * m_counts[j][v_j]) > 0) mi += m_2counts[i][j][(int) (v_j * nValues[i] + v_i)] * Math.log((1.0 * data.numInstances() * m_2counts[i][j][(int) (v_j * nValues[i] + v_i)]) / (1.0 * m_counts[i][v_i] * m_counts[j][v_j])); } } I[i][j] = mi / data.numInstances(); } } } return I; }
From source file:es.upm.dit.gsi.barmas.dataset.utils.DatasetSplitter.java
License:Open Source License
/** * @param folds/*from w w w. j av a 2 s .c o m*/ * @param minAgents * @param maxAgents * @param originalDatasetPath * @param outputDir * @param scenario * @param logger */ public void splitDataset(int folds, int minAgents, int maxAgents, String originalDatasetPath, String outputDir, String scenario, Logger logger) { int ratioint = (int) ((1 / (double) folds) * 100); double roundedratio = ((double) ratioint) / 100; // Look for essentials List<String[]> essentials = this.getEssentials(originalDatasetPath, logger); for (int fold = 0; fold < folds; fold++) { String outputDirWithRatio = outputDir + "/" + roundedratio + "testRatio/iteration-" + fold; File dir = new File(outputDirWithRatio); if (!dir.exists() || !dir.isDirectory()) { dir.mkdirs(); } logger.finer("--> splitDataset()"); logger.fine("Creating experiment.info..."); try { Instances originalData = this.getDataFromCSV(originalDatasetPath); originalData.randomize(new Random()); originalData.stratify(folds); // TestDataSet Instances testData = originalData.testCV(folds, fold); CSVSaver saver = new CSVSaver(); ArffSaver arffsaver = new ArffSaver(); File file = new File(outputDirWithRatio + File.separator + "test-dataset.csv"); if (!file.exists()) { saver.resetOptions(); saver.setInstances(testData); saver.setFile(file); saver.writeBatch(); } file = new File(outputDirWithRatio + File.separator + "test-dataset.arff"); if (!file.exists()) { arffsaver.resetOptions(); arffsaver.setInstances(testData); arffsaver.setFile(file); arffsaver.writeBatch(); } // BayesCentralDataset Instances trainData = originalData.trainCV(folds, fold); file = new File(outputDirWithRatio + File.separator + "bayes-central-dataset.csv"); if (!file.exists()) { saver.resetOptions(); saver.setInstances(trainData); saver.setFile(file); saver.writeBatch(); this.copyFileUsingApacheCommonsIO(file, new File( outputDirWithRatio + File.separator + "bayes-central-dataset-noEssentials.csv"), logger); CsvWriter w = new CsvWriter(new FileWriter(file, true), ','); for (String[] essential : essentials) { w.writeRecord(essential); } w.close(); } file = new File(outputDirWithRatio + File.separator + "bayes-central-dataset.arff"); if (!file.exists()) { arffsaver.resetOptions(); arffsaver.setInstances(trainData); arffsaver.setFile(file); arffsaver.writeBatch(); this.copyFileUsingApacheCommonsIO(file, new File( outputDirWithRatio + File.separator + "bayes-central-dataset-noEssentials.arff"), logger); CsvWriter w = new CsvWriter(new FileWriter(file, true), ','); for (String[] essential : essentials) { w.writeRecord(essential); } w.close(); } // Agent datasets CsvReader csvreader = new CsvReader(new FileReader(new File(originalDatasetPath))); csvreader.readHeaders(); String[] headers = csvreader.getHeaders(); csvreader.close(); for (int agents = minAgents; agents <= maxAgents; agents++) { this.createExperimentInfoFile(folds, agents, originalDatasetPath, outputDirWithRatio, scenario, logger); HashMap<String, CsvWriter> writers = new HashMap<String, CsvWriter>(); String agentsDatasetsDir = outputDirWithRatio + File.separator + agents + "agents"; HashMap<String, CsvWriter> arffWriters = new HashMap<String, CsvWriter>(); File f = new File(agentsDatasetsDir); if (!f.isDirectory()) { f.mkdirs(); } Instances copy = new Instances(trainData); copy.delete(); for (int i = 0; i < agents; i++) { String fileName = agentsDatasetsDir + File.separator + "agent-" + i + "-dataset.csv"; file = new File(fileName); if (!file.exists()) { CsvWriter writer = new CsvWriter(new FileWriter(fileName), ','); writer.writeRecord(headers); writers.put("AGENT" + i, writer); } fileName = agentsDatasetsDir + File.separator + "agent-" + i + "-dataset.arff"; file = new File(fileName); if (!file.exists()) { arffsaver.resetOptions(); arffsaver.setInstances(copy); arffsaver.setFile(new File(fileName)); arffsaver.writeBatch(); CsvWriter arffwriter = new CsvWriter(new FileWriter(fileName, true), ','); arffWriters.put("AGENT" + i, arffwriter); } logger.fine("AGENT" + i + " dataset created in csv and arff formats."); } // Append essentials to all for (String[] essential : essentials) { for (CsvWriter wr : writers.values()) { wr.writeRecord(essential); } for (CsvWriter arffwr : arffWriters.values()) { arffwr.writeRecord(essential); } } int agentCounter = 0; for (int j = 0; j < trainData.numInstances(); j++) { Instance instance = trainData.instance(j); CsvWriter writer = writers.get("AGENT" + agentCounter); CsvWriter arffwriter = arffWriters.get("AGENT" + agentCounter); String[] row = new String[instance.numAttributes()]; for (int a = 0; a < instance.numAttributes(); a++) { row[a] = instance.stringValue(a); } if (writer != null) { writer.writeRecord(row); } if (arffwriter != null) { arffwriter.writeRecord(row); } agentCounter++; if (agentCounter == agents) { agentCounter = 0; } } for (CsvWriter wr : writers.values()) { wr.close(); } for (CsvWriter arffwr : arffWriters.values()) { arffwr.close(); } } } catch (Exception e) { logger.severe("Exception while splitting dataset. ->"); logger.severe(e.getMessage()); System.exit(1); } logger.finest("Dataset for fold " + fold + " created."); } logger.finer("<-- splitDataset()"); }
From source file:es.upm.dit.gsi.barmas.launcher.WekaClassifiersValidator.java
License:Open Source License
/** * @param cls// w w w . j av a 2 s .c o m * @param trainingData * @param testData * @param leba * @return [0] = pctCorrect, [1] = pctIncorrect * @throws Exception */ public double[] getValidation(Classifier cls, Instances trainingData, Instances testData, int leba) throws Exception { Instances testDataWithLEBA = new Instances(testData); for (int j = 0; j < leba; j++) { if (j < testDataWithLEBA.numAttributes() - 1) { for (int i = 0; i < testDataWithLEBA.numInstances(); i++) { testDataWithLEBA.instance(i).setMissing(j); } } } Evaluation eval; try { eval = new Evaluation(trainingData); logger.fine("Evaluating model with leba: " + leba); eval.evaluateModel(cls, testDataWithLEBA); double[] results = new double[2]; results[0] = eval.pctCorrect() / 100; results[1] = eval.pctIncorrect() / 100; return results; } catch (Exception e) { logger.severe("Problems evaluating model for " + cls.getClass().getSimpleName()); logger.severe(e.getMessage()); e.printStackTrace(); throw e; } }
From source file:etc.aloe.filters.StringToDictionaryVector.java
License:Open Source License
private int[] determineDictionary(Instances instances) { if (stringAttributeIndex < 0) { throw new IllegalStateException("String attribute index not valid"); }//from ww w. j a v a 2 s .co m // Operate on a per-class basis if class attribute is set int classInd = instances.classIndex(); int values = 1; if (!m_doNotOperateOnPerClassBasis && (classInd != -1)) { values = instances.attribute(classInd).numValues(); } HashMap<String, Integer> termIndices = new HashMap<String, Integer>(); for (int i = 0; i < termList.size(); i++) { termIndices.put(termList.get(i), i); } //Create the trie for matching terms Trie termTrie = new Trie(termList); //Initialize the dictionary/count map ArrayList<HashMap<Integer, Count>> termCounts = new ArrayList<HashMap<Integer, Count>>(); for (int z = 0; z < values; z++) { termCounts.add(new HashMap<Integer, Count>()); } //Go through all the instances and count the emoticons for (int i = 0; i < instances.numInstances(); i++) { Instance instance = instances.instance(i); int vInd = 0; if (!m_doNotOperateOnPerClassBasis && (classInd != -1)) { vInd = (int) instance.classValue(); } //Get the string attribute to examine String stringValue = instance.stringValue(stringAttributeIndex); HashMap<Integer, Count> termCountsForClass = termCounts.get(vInd); HashMap<String, Integer> termMatches = termTrie.countNonoverlappingMatches(stringValue); for (Map.Entry<String, Integer> entry : termMatches.entrySet()) { String term = entry.getKey(); int termIdx = termIndices.get(term); int matches = entry.getValue(); Count count = termCountsForClass.get(termIdx); if (count == null) { count = new Count(0); termCountsForClass.put(termIdx, count); } if (matches > 0) { count.docCount += 1; count.count += matches; } } } // Figure out the minimum required word frequency int prune[] = new int[values]; for (int z = 0; z < values; z++) { HashMap<Integer, Count> termCountsForClass = termCounts.get(z); int array[] = new int[termCountsForClass.size()]; int pos = 0; for (Map.Entry<Integer, Count> entry : termCountsForClass.entrySet()) { array[pos] = entry.getValue().count; pos++; } // sort the array sortArray(array); if (array.length < m_WordsToKeep) { // if there aren't enough words, set the threshold to // minFreq prune[z] = m_minTermFreq; } else { // otherwise set it to be at least minFreq prune[z] = Math.max(m_minTermFreq, array[array.length - m_WordsToKeep]); } } // Add the word vector attributes (eliminating duplicates // that occur in multiple classes) HashSet<String> selectedTerms = new HashSet<String>(); for (int z = 0; z < values; z++) { HashMap<Integer, Count> termCountsForClass = termCounts.get(z); for (Map.Entry<Integer, Count> entry : termCountsForClass.entrySet()) { int termIndex = entry.getKey(); String term = termList.get(termIndex); Count count = entry.getValue(); if (count.count >= prune[z]) { selectedTerms.add(term); } } } //Save the selected terms as a list this.m_selectedTerms = new ArrayList<String>(selectedTerms); this.m_selectedTermsTrie = new Trie(this.m_selectedTerms); this.m_NumInstances = instances.size(); //Construct the selected terms to index map this.m_selectedTermIndices = new HashMap<String, Integer>(); for (int i = 0; i < m_selectedTerms.size(); i++) { m_selectedTermIndices.put(m_selectedTerms.get(i), i); } // Compute document frequencies, organized by selected term index (not original term index) int[] docsCounts = new int[m_selectedTerms.size()]; for (int i = 0; i < m_selectedTerms.size(); i++) { String term = m_selectedTerms.get(i); int termIndex = termIndices.get(term); int docsCount = 0; for (int z = 0; z < values; z++) { HashMap<Integer, Count> termCountsForClass = termCounts.get(z); Count count = termCountsForClass.get(termIndex); if (count != null) { docsCount += count.docCount; } } docsCounts[i] = docsCount; } return docsCounts; }
From source file:eu.cassandra.server.mongo.csn.MongoCluster.java
License:Apache License
/** * /*w w w .j a v a 2 s. co m*/ * @param message * @param graph_id * @param clusterBasedOn * @param numberOfClusters * @param httpHeaders * @return */ private DBObject clusterKmeans(String message, String graph_id, String run_id, String clusterBasedOn, int numberOfClusters, String name, String clusterbasedon) { try { Instances instances = getInstances(clusterBasedOn, graph_id); if (instances.numInstances() < 2) { return new JSONtoReturn().createJSONError(message, new Exception("Number of CSN Nodes is < 2")); } SimpleKMeans kmeans = new SimpleKMeans(); kmeans.setSeed((int) Calendar.getInstance().getTimeInMillis()); // This is the important parameter to set kmeans.setPreserveInstancesOrder(true); kmeans.setNumClusters(numberOfClusters); kmeans.buildClusterer(instances); // This array returns the cluster number (starting with 0) for each instance // The array has as many elements as the number of instances int[] assignments = kmeans.getAssignments(); int i = 0; HashMap<Integer, Vector<String>> clusters = new HashMap<Integer, Vector<String>>(); for (int clusterNum : assignments) { if (clusters.containsKey(clusterNum)) { Vector<String> cluster = clusters.get(clusterNum); cluster.add(nodeIDs.get(i)); clusters.put(clusterNum, cluster); } else { Vector<String> cluster = new Vector<String>(); cluster.add(nodeIDs.get(i)); clusters.put(clusterNum, cluster); } i++; } nodeIDs.clear(); return saveClusters(graph_id, run_id, "kmeans", clusters, null, name, clusterbasedon); } catch (Exception e) { e.printStackTrace(); return new JSONtoReturn().createJSONError(message, e); } }
From source file:eu.cassandra.server.mongo.csn.MongoCluster.java
License:Apache License
public DBObject clusterHierarchical(String message, String graph_id, String run_id, String clusterBasedOn, int numberOfClusters, String name, String clusterbasedon) { try {// ww w . j a v a 2 s .c o m Instances instances = getInstances(clusterBasedOn, graph_id); if (instances.numInstances() < 2) { return new JSONtoReturn().createJSONError(message, new Exception("Number of CSN Nodes is < 2")); } HierarchicalClusterer h = new HierarchicalClusterer(); h.setOptions(new String[] { "-L", "AVERAGE" }); h.setDistanceFunction(new EuclideanDistance()); if (numberOfClusters > 0) h.setNumClusters(numberOfClusters); h.buildClusterer(instances); HashMap<Integer, Vector<String>> clusters = new HashMap<Integer, Vector<String>>(); double[] arr; for (int i = 0; i < instances.numInstances(); i++) { String nodeId = nodeIDs.get(i); arr = h.distributionForInstance(instances.instance(i)); for (int j = 0; j < arr.length; j++) { if (arr[j] == 1.0) { if (!clusters.containsKey(j)) { Vector<String> nodes = new Vector<String>(); nodes.add(nodeId); clusters.put(j, nodes); } else { Vector<String> nodes = clusters.get(j); nodes.add(nodeId); clusters.put(j, nodes); } } } } return saveClusters(graph_id, run_id, "hierarchical", clusters, null, name, clusterbasedon); } catch (Exception e) { e.printStackTrace(); return new JSONtoReturn().createJSONError(message, e); } }