List of usage examples for weka.core Instances instance
publicInstance instance(int index)
From source file:es.upm.dit.gsi.barmas.dataset.utils.DatasetSplitter.java
License:Open Source License
/** * @param folds/* w w w . ja v a 2 s. c o m*/ * @param minAgents * @param maxAgents * @param originalDatasetPath * @param outputDir * @param scenario * @param logger */ public void splitDataset(int folds, int minAgents, int maxAgents, String originalDatasetPath, String outputDir, String scenario, Logger logger) { int ratioint = (int) ((1 / (double) folds) * 100); double roundedratio = ((double) ratioint) / 100; // Look for essentials List<String[]> essentials = this.getEssentials(originalDatasetPath, logger); for (int fold = 0; fold < folds; fold++) { String outputDirWithRatio = outputDir + "/" + roundedratio + "testRatio/iteration-" + fold; File dir = new File(outputDirWithRatio); if (!dir.exists() || !dir.isDirectory()) { dir.mkdirs(); } logger.finer("--> splitDataset()"); logger.fine("Creating experiment.info..."); try { Instances originalData = this.getDataFromCSV(originalDatasetPath); originalData.randomize(new Random()); originalData.stratify(folds); // TestDataSet Instances testData = originalData.testCV(folds, fold); CSVSaver saver = new CSVSaver(); ArffSaver arffsaver = new ArffSaver(); File file = new File(outputDirWithRatio + File.separator + "test-dataset.csv"); if (!file.exists()) { saver.resetOptions(); saver.setInstances(testData); saver.setFile(file); saver.writeBatch(); } file = new File(outputDirWithRatio + File.separator + "test-dataset.arff"); if (!file.exists()) { arffsaver.resetOptions(); arffsaver.setInstances(testData); arffsaver.setFile(file); arffsaver.writeBatch(); } // BayesCentralDataset Instances trainData = originalData.trainCV(folds, fold); file = new File(outputDirWithRatio + File.separator + "bayes-central-dataset.csv"); if (!file.exists()) { saver.resetOptions(); saver.setInstances(trainData); saver.setFile(file); saver.writeBatch(); this.copyFileUsingApacheCommonsIO(file, new File( outputDirWithRatio + File.separator + "bayes-central-dataset-noEssentials.csv"), logger); CsvWriter w = new CsvWriter(new FileWriter(file, true), ','); for (String[] essential : essentials) { w.writeRecord(essential); } w.close(); } file = new File(outputDirWithRatio + File.separator + "bayes-central-dataset.arff"); if (!file.exists()) { arffsaver.resetOptions(); arffsaver.setInstances(trainData); arffsaver.setFile(file); arffsaver.writeBatch(); this.copyFileUsingApacheCommonsIO(file, new File( outputDirWithRatio + File.separator + "bayes-central-dataset-noEssentials.arff"), logger); CsvWriter w = new CsvWriter(new FileWriter(file, true), ','); for (String[] essential : essentials) { w.writeRecord(essential); } w.close(); } // Agent datasets CsvReader csvreader = new CsvReader(new FileReader(new File(originalDatasetPath))); csvreader.readHeaders(); String[] headers = csvreader.getHeaders(); csvreader.close(); for (int agents = minAgents; agents <= maxAgents; agents++) { this.createExperimentInfoFile(folds, agents, originalDatasetPath, outputDirWithRatio, scenario, logger); HashMap<String, CsvWriter> writers = new HashMap<String, CsvWriter>(); String agentsDatasetsDir = outputDirWithRatio + File.separator + agents + "agents"; HashMap<String, CsvWriter> arffWriters = new HashMap<String, CsvWriter>(); File f = new File(agentsDatasetsDir); if (!f.isDirectory()) { f.mkdirs(); } Instances copy = new Instances(trainData); copy.delete(); for (int i = 0; i < agents; i++) { String fileName = agentsDatasetsDir + File.separator + "agent-" + i + "-dataset.csv"; file = new File(fileName); if (!file.exists()) { CsvWriter writer = new CsvWriter(new FileWriter(fileName), ','); writer.writeRecord(headers); writers.put("AGENT" + i, writer); } fileName = agentsDatasetsDir + File.separator + "agent-" + i + "-dataset.arff"; file = new File(fileName); if (!file.exists()) { arffsaver.resetOptions(); arffsaver.setInstances(copy); arffsaver.setFile(new File(fileName)); arffsaver.writeBatch(); CsvWriter arffwriter = new CsvWriter(new FileWriter(fileName, true), ','); arffWriters.put("AGENT" + i, arffwriter); } logger.fine("AGENT" + i + " dataset created in csv and arff formats."); } // Append essentials to all for (String[] essential : essentials) { for (CsvWriter wr : writers.values()) { wr.writeRecord(essential); } for (CsvWriter arffwr : arffWriters.values()) { arffwr.writeRecord(essential); } } int agentCounter = 0; for (int j = 0; j < trainData.numInstances(); j++) { Instance instance = trainData.instance(j); CsvWriter writer = writers.get("AGENT" + agentCounter); CsvWriter arffwriter = arffWriters.get("AGENT" + agentCounter); String[] row = new String[instance.numAttributes()]; for (int a = 0; a < instance.numAttributes(); a++) { row[a] = instance.stringValue(a); } if (writer != null) { writer.writeRecord(row); } if (arffwriter != null) { arffwriter.writeRecord(row); } agentCounter++; if (agentCounter == agents) { agentCounter = 0; } } for (CsvWriter wr : writers.values()) { wr.close(); } for (CsvWriter arffwr : arffWriters.values()) { arffwr.close(); } } } catch (Exception e) { logger.severe("Exception while splitting dataset. ->"); logger.severe(e.getMessage()); System.exit(1); } logger.finest("Dataset for fold " + fold + " created."); } logger.finer("<-- splitDataset()"); }
From source file:es.upm.dit.gsi.barmas.launcher.WekaClassifiersValidator.java
License:Open Source License
/** * @param cls/* w w w . j a va2 s. co m*/ * @param trainingData * @param testData * @param leba * @return [0] = pctCorrect, [1] = pctIncorrect * @throws Exception */ public double[] getValidation(Classifier cls, Instances trainingData, Instances testData, int leba) throws Exception { Instances testDataWithLEBA = new Instances(testData); for (int j = 0; j < leba; j++) { if (j < testDataWithLEBA.numAttributes() - 1) { for (int i = 0; i < testDataWithLEBA.numInstances(); i++) { testDataWithLEBA.instance(i).setMissing(j); } } } Evaluation eval; try { eval = new Evaluation(trainingData); logger.fine("Evaluating model with leba: " + leba); eval.evaluateModel(cls, testDataWithLEBA); double[] results = new double[2]; results[0] = eval.pctCorrect() / 100; results[1] = eval.pctIncorrect() / 100; return results; } catch (Exception e) { logger.severe("Problems evaluating model for " + cls.getClass().getSimpleName()); logger.severe(e.getMessage()); e.printStackTrace(); throw e; } }
From source file:etc.aloe.filters.StringToDictionaryVector.java
License:Open Source License
private int[] determineDictionary(Instances instances) { if (stringAttributeIndex < 0) { throw new IllegalStateException("String attribute index not valid"); }/*ww w. j av a 2s .c o m*/ // Operate on a per-class basis if class attribute is set int classInd = instances.classIndex(); int values = 1; if (!m_doNotOperateOnPerClassBasis && (classInd != -1)) { values = instances.attribute(classInd).numValues(); } HashMap<String, Integer> termIndices = new HashMap<String, Integer>(); for (int i = 0; i < termList.size(); i++) { termIndices.put(termList.get(i), i); } //Create the trie for matching terms Trie termTrie = new Trie(termList); //Initialize the dictionary/count map ArrayList<HashMap<Integer, Count>> termCounts = new ArrayList<HashMap<Integer, Count>>(); for (int z = 0; z < values; z++) { termCounts.add(new HashMap<Integer, Count>()); } //Go through all the instances and count the emoticons for (int i = 0; i < instances.numInstances(); i++) { Instance instance = instances.instance(i); int vInd = 0; if (!m_doNotOperateOnPerClassBasis && (classInd != -1)) { vInd = (int) instance.classValue(); } //Get the string attribute to examine String stringValue = instance.stringValue(stringAttributeIndex); HashMap<Integer, Count> termCountsForClass = termCounts.get(vInd); HashMap<String, Integer> termMatches = termTrie.countNonoverlappingMatches(stringValue); for (Map.Entry<String, Integer> entry : termMatches.entrySet()) { String term = entry.getKey(); int termIdx = termIndices.get(term); int matches = entry.getValue(); Count count = termCountsForClass.get(termIdx); if (count == null) { count = new Count(0); termCountsForClass.put(termIdx, count); } if (matches > 0) { count.docCount += 1; count.count += matches; } } } // Figure out the minimum required word frequency int prune[] = new int[values]; for (int z = 0; z < values; z++) { HashMap<Integer, Count> termCountsForClass = termCounts.get(z); int array[] = new int[termCountsForClass.size()]; int pos = 0; for (Map.Entry<Integer, Count> entry : termCountsForClass.entrySet()) { array[pos] = entry.getValue().count; pos++; } // sort the array sortArray(array); if (array.length < m_WordsToKeep) { // if there aren't enough words, set the threshold to // minFreq prune[z] = m_minTermFreq; } else { // otherwise set it to be at least minFreq prune[z] = Math.max(m_minTermFreq, array[array.length - m_WordsToKeep]); } } // Add the word vector attributes (eliminating duplicates // that occur in multiple classes) HashSet<String> selectedTerms = new HashSet<String>(); for (int z = 0; z < values; z++) { HashMap<Integer, Count> termCountsForClass = termCounts.get(z); for (Map.Entry<Integer, Count> entry : termCountsForClass.entrySet()) { int termIndex = entry.getKey(); String term = termList.get(termIndex); Count count = entry.getValue(); if (count.count >= prune[z]) { selectedTerms.add(term); } } } //Save the selected terms as a list this.m_selectedTerms = new ArrayList<String>(selectedTerms); this.m_selectedTermsTrie = new Trie(this.m_selectedTerms); this.m_NumInstances = instances.size(); //Construct the selected terms to index map this.m_selectedTermIndices = new HashMap<String, Integer>(); for (int i = 0; i < m_selectedTerms.size(); i++) { m_selectedTermIndices.put(m_selectedTerms.get(i), i); } // Compute document frequencies, organized by selected term index (not original term index) int[] docsCounts = new int[m_selectedTerms.size()]; for (int i = 0; i < m_selectedTerms.size(); i++) { String term = m_selectedTerms.get(i); int termIndex = termIndices.get(term); int docsCount = 0; for (int z = 0; z < values; z++) { HashMap<Integer, Count> termCountsForClass = termCounts.get(z); Count count = termCountsForClass.get(termIndex); if (count != null) { docsCount += count.docCount; } } docsCounts[i] = docsCount; } return docsCounts; }
From source file:etc.aloe.filters.StringToDictionaryVector.java
License:Open Source License
@Override protected Instances process(Instances instances) throws Exception { Instances result = new Instances(getOutputFormat(), 0); // Convert all instances w/o normalization ArrayList<Instance> converted = new ArrayList<Instance>(); ArrayList<Double> docLengths = new ArrayList<Double>(); if (!isFirstBatchDone()) { m_AvgDocLength = 0;//from w ww. j a v a 2s . c o m } for (int i = 0; i < instances.size(); i++) { double docLength = convertInstancewoDocNorm(instances.instance(i), converted); // Need to compute average document length if necessary if (m_filterType != FILTER_NONE) { if (!isFirstBatchDone()) { m_AvgDocLength += docLength; } docLengths.add(docLength); } } if (m_filterType != FILTER_NONE) { if (!isFirstBatchDone()) { m_AvgDocLength /= instances.size(); } // Perform normalization if necessary. if (isFirstBatchDone() || (!isFirstBatchDone() && m_filterType == FILTER_NORMALIZE_ALL)) { for (int i = 0; i < converted.size(); i++) { normalizeInstance(converted.get(i), docLengths.get(i)); } } } // Push all instances into the output queue for (int i = 0; i < converted.size(); i++) { result.add(converted.get(i)); } return result; }
From source file:etc.aloe.filters.StringToDictionaryVector.java
License:Open Source License
public static void main(String[] args) { //Create a test dataset ArrayList<Attribute> attributes = new ArrayList<Attribute>(); attributes.add(new Attribute("message", (ArrayList<String>) null)); attributes.add(new Attribute("id")); {/*from ww w. ja va2 s .c om*/ ArrayList<String> classValues = new ArrayList<String>(); classValues.add("0"); classValues.add("1"); attributes.add(new Attribute("class", classValues)); } Instances instances = new Instances("test", attributes, 0); instances.setClassIndex(2); String[] messages = new String[] { "No emoticons here", "I have a smiley :)", "Two smileys and a frownie :) :) :(", "Several emoticons :( :-( :) :-) ;-) 8-) :-/ :-P" }; for (int i = 0; i < messages.length; i++) { Instance instance = new DenseInstance(instances.numAttributes()); instance.setValue(instances.attribute(0), messages[i]); instance.setValue(instances.attribute(1), i); instance.setValue(instances.attribute(2), Integer.toString(i % 2)); instances.add(instance); } System.out.println("Before filter:"); for (int i = 0; i < instances.size(); i++) { System.out.println(instances.instance(i).toString()); } try { String dictionaryName = "emoticons.txt"; StringToDictionaryVector filter = new StringToDictionaryVector(); List<String> termList = StringToDictionaryVector.readDictionaryFile(new File(dictionaryName)); filter.setTermList(termList); filter.setMinTermFreq(1); filter.setTFTransform(true); filter.setIDFTransform(true); filter.setNormalizeDocLength(new SelectedTag(FILTER_NORMALIZE_TEST_ONLY, TAGS_FILTER)); filter.setOutputWordCounts(true); filter.setStringAttribute("message"); filter.setInputFormat(instances); Instances trans1 = Filter.useFilter(instances, filter); Instances trans2 = Filter.useFilter(instances, filter); System.out.println("\nFirst application:"); System.out.println(trans1.toString()); System.out.println("\nSecond application:"); System.out.println(trans2.toString()); } catch (Exception e) { e.printStackTrace(); } }
From source file:eu.cassandra.server.mongo.csn.MongoCluster.java
License:Apache License
public DBObject clusterHierarchical(String message, String graph_id, String run_id, String clusterBasedOn, int numberOfClusters, String name, String clusterbasedon) { try {/*from ww w. j av a 2 s. c o m*/ Instances instances = getInstances(clusterBasedOn, graph_id); if (instances.numInstances() < 2) { return new JSONtoReturn().createJSONError(message, new Exception("Number of CSN Nodes is < 2")); } HierarchicalClusterer h = new HierarchicalClusterer(); h.setOptions(new String[] { "-L", "AVERAGE" }); h.setDistanceFunction(new EuclideanDistance()); if (numberOfClusters > 0) h.setNumClusters(numberOfClusters); h.buildClusterer(instances); HashMap<Integer, Vector<String>> clusters = new HashMap<Integer, Vector<String>>(); double[] arr; for (int i = 0; i < instances.numInstances(); i++) { String nodeId = nodeIDs.get(i); arr = h.distributionForInstance(instances.instance(i)); for (int j = 0; j < arr.length; j++) { if (arr[j] == 1.0) { if (!clusters.containsKey(j)) { Vector<String> nodes = new Vector<String>(); nodes.add(nodeId); clusters.put(j, nodes); } else { Vector<String> nodes = clusters.get(j); nodes.add(nodeId); clusters.put(j, nodes); } } } } return saveClusters(graph_id, run_id, "hierarchical", clusters, null, name, clusterbasedon); } catch (Exception e) { e.printStackTrace(); return new JSONtoReturn().createJSONError(message, e); } }
From source file:examples.Pair.java
License:Open Source License
public static Pair<Instances, Instances> seprateTestAndTrainingSets(Instances instances, double probability) { Instances trainingSet = new Instances(instances, 0, 0); Instances testSet = new Instances(instances, 0, 0); Random rand = new Random(); rand.setSeed(1L);//from ww w . ja v a 2 s . c o m for (int i = 0; i < instances.numInstances(); i++) { Instance instance = instances.instance(i); if (rand.nextDouble() > probability) { testSet.add(instance); } else { trainingSet.add(instance); } } return new Pair<Instances, Instances>(trainingSet, testSet); }
From source file:expshell.NeuralClassifier.java
@Override public void buildClassifier(Instances i) throws Exception { List<Integer> numNodes = new ArrayList<Integer>(); //numNodes.add(5); //numNodes.add(6); numNodes.add(i.numClasses());// www. j a v a2 s. c o m nn = new NeuralNetwork(numNodes.size(), i.numAttributes() - 1, numNodes); for (int j = 0; j < i.numInstances(); j++) { System.out.println(nn.run(i.instance(j))); } }
From source file:fantail.algorithms.AbstractRanker.java
License:Open Source License
public static double[] getAvgRankValues(Instances data) throws Exception { if (data.numInstances() == 0) { throw new Exception("data can't be empty."); }/*w w w . ja va2 s. c o m*/ int numLabels = Tools.getNumberTargets(data); double[] avgVals = new double[numLabels]; for (int m = 0; m < data.numInstances(); m++) { Instance inst = data.instance(m); double[] targetValues = Tools.getTargetVector(inst); for (int j = 0; j < targetValues.length; j++) { avgVals[j] += (targetValues[j] * inst.weight()); } } for (int i = 0; i < avgVals.length; i++) { avgVals[i] /= data.numInstances(); } return avgVals; }
From source file:fantail.algorithms.AverageRanking.java
License:Open Source License
@Override public void buildRanker(Instances data) throws Exception { Instances workingData = new Instances(data); int numLabels = Tools.getNumberTargets(workingData); m_DefRanking = new double[numLabels]; for (int m = 0; m < workingData.numInstances(); m++) { Instance inst = workingData.instance(m); double[] targetValues = Tools.getTargetVector(inst); for (int j = 0; j < targetValues.length; j++) { m_DefRanking[j] += (targetValues[j]); }/*from w w w . j a v a 2 s. com*/ } }