List of usage examples for weka.core Instance numAttributes
public int numAttributes();
From source file:edu.oregonstate.eecs.mcplan.ml.WekaGlue.java
License:Open Source License
public static double[] toDoubleArray(final Instance inst) { final double[] v = new double[inst.numAttributes()]; for (int i = 0; i < inst.numAttributes(); ++i) { v[i] = inst.value(i);// w ww. ja v a2 s.c o m } return v; }
From source file:edu.umbc.cs.maple.utils.WekaUtils.java
License:Open Source License
/** * Converts an instance to a feature vector excluding the class attribute. * @param instance The instance./* w ww .j a v a 2 s. c o m*/ * @return A vector representation of the instance excluding the class attribute */ public static double[] instanceToDoubleArray(Instance instance) { double[] vector = new double[(instance.classIndex() != -1) ? instance.numAttributes() - 1 : instance.numAttributes()]; double[] instanceDoubleArray = instance.toDoubleArray(); int attIdx = 0; for (int i = 0; i < vector.length; i++) { if (i == instance.classIndex()) { attIdx++; } vector[i] = instanceDoubleArray[attIdx++]; } return vector; }
From source file:edu.umbc.cs.maple.utils.WekaUtils.java
License:Open Source License
/** Converts a set of instances to svm-light format * @param data the weka instances//w w w . ja v a 2s.c om * @return the weka instances in svm-light format */ public static String arffToSVMLight(Instance data, SVMLightLabelFormat labelFormat) { if (labelFormat == SVMLightLabelFormat.CLASSIFICATION && data.numClasses() != 2) { throw new IllegalArgumentException( "SVM-light classification label format requires that the data contain only two classes."); } String str = ""; String endline = System.getProperty("line.separator"); int numAttributes = data.numAttributes(); int classAttIdx = data.classIndex(); // convert the instance label if (labelFormat == SVMLightLabelFormat.CLASSIFICATION) { str += (data.classValue() == 0) ? "-1" : "1"; } else { str += data.classValue(); } str += " "; // convert each feature for (int attIdx = 0; attIdx < numAttributes; attIdx++) { // skip the class attribute if (attIdx == classAttIdx) continue; str += (attIdx + 1) + ":" + data.value(attIdx) + " "; } // append the instance info string str += "#"; str += endline; return str; }
From source file:es.jarias.FMC.ClassCompoundTransformation.java
License:Open Source License
/** * /*ww w . j a va2s. com*/ * @param instance * @param labelIndices * @return tranformed instance * @throws Exception */ public Instance transformInstance(Instance instance, int[] labelIndices) throws Exception { Instance transformedInstance = RemoveAllLabels.transformInstance(instance, labelIndices); transformedInstance.setDataset(null); transformedInstance.insertAttributeAt(transformedInstance.numAttributes()); transformedInstance.setDataset(transformedFormat); return transformedInstance; }
From source file:es.upm.dit.gsi.barmas.dataset.utils.DatasetSplitter.java
License:Open Source License
/** * @param folds//from w w w . ja v a 2s. c o m * @param minAgents * @param maxAgents * @param originalDatasetPath * @param outputDir * @param scenario * @param logger */ public void splitDataset(int folds, int minAgents, int maxAgents, String originalDatasetPath, String outputDir, String scenario, Logger logger) { int ratioint = (int) ((1 / (double) folds) * 100); double roundedratio = ((double) ratioint) / 100; // Look for essentials List<String[]> essentials = this.getEssentials(originalDatasetPath, logger); for (int fold = 0; fold < folds; fold++) { String outputDirWithRatio = outputDir + "/" + roundedratio + "testRatio/iteration-" + fold; File dir = new File(outputDirWithRatio); if (!dir.exists() || !dir.isDirectory()) { dir.mkdirs(); } logger.finer("--> splitDataset()"); logger.fine("Creating experiment.info..."); try { Instances originalData = this.getDataFromCSV(originalDatasetPath); originalData.randomize(new Random()); originalData.stratify(folds); // TestDataSet Instances testData = originalData.testCV(folds, fold); CSVSaver saver = new CSVSaver(); ArffSaver arffsaver = new ArffSaver(); File file = new File(outputDirWithRatio + File.separator + "test-dataset.csv"); if (!file.exists()) { saver.resetOptions(); saver.setInstances(testData); saver.setFile(file); saver.writeBatch(); } file = new File(outputDirWithRatio + File.separator + "test-dataset.arff"); if (!file.exists()) { arffsaver.resetOptions(); arffsaver.setInstances(testData); arffsaver.setFile(file); arffsaver.writeBatch(); } // BayesCentralDataset Instances trainData = originalData.trainCV(folds, fold); file = new File(outputDirWithRatio + File.separator + "bayes-central-dataset.csv"); if (!file.exists()) { saver.resetOptions(); saver.setInstances(trainData); saver.setFile(file); saver.writeBatch(); this.copyFileUsingApacheCommonsIO(file, new File( outputDirWithRatio + File.separator + "bayes-central-dataset-noEssentials.csv"), logger); CsvWriter w = new CsvWriter(new FileWriter(file, true), ','); for (String[] essential : essentials) { w.writeRecord(essential); } w.close(); } file = new File(outputDirWithRatio + File.separator + "bayes-central-dataset.arff"); if (!file.exists()) { arffsaver.resetOptions(); arffsaver.setInstances(trainData); arffsaver.setFile(file); arffsaver.writeBatch(); this.copyFileUsingApacheCommonsIO(file, new File( outputDirWithRatio + File.separator + "bayes-central-dataset-noEssentials.arff"), logger); CsvWriter w = new CsvWriter(new FileWriter(file, true), ','); for (String[] essential : essentials) { w.writeRecord(essential); } w.close(); } // Agent datasets CsvReader csvreader = new CsvReader(new FileReader(new File(originalDatasetPath))); csvreader.readHeaders(); String[] headers = csvreader.getHeaders(); csvreader.close(); for (int agents = minAgents; agents <= maxAgents; agents++) { this.createExperimentInfoFile(folds, agents, originalDatasetPath, outputDirWithRatio, scenario, logger); HashMap<String, CsvWriter> writers = new HashMap<String, CsvWriter>(); String agentsDatasetsDir = outputDirWithRatio + File.separator + agents + "agents"; HashMap<String, CsvWriter> arffWriters = new HashMap<String, CsvWriter>(); File f = new File(agentsDatasetsDir); if (!f.isDirectory()) { f.mkdirs(); } Instances copy = new Instances(trainData); copy.delete(); for (int i = 0; i < agents; i++) { String fileName = agentsDatasetsDir + File.separator + "agent-" + i + "-dataset.csv"; file = new File(fileName); if (!file.exists()) { CsvWriter writer = new CsvWriter(new FileWriter(fileName), ','); writer.writeRecord(headers); writers.put("AGENT" + i, writer); } fileName = agentsDatasetsDir + File.separator + "agent-" + i + "-dataset.arff"; file = new File(fileName); if (!file.exists()) { arffsaver.resetOptions(); arffsaver.setInstances(copy); arffsaver.setFile(new File(fileName)); arffsaver.writeBatch(); CsvWriter arffwriter = new CsvWriter(new FileWriter(fileName, true), ','); arffWriters.put("AGENT" + i, arffwriter); } logger.fine("AGENT" + i + " dataset created in csv and arff formats."); } // Append essentials to all for (String[] essential : essentials) { for (CsvWriter wr : writers.values()) { wr.writeRecord(essential); } for (CsvWriter arffwr : arffWriters.values()) { arffwr.writeRecord(essential); } } int agentCounter = 0; for (int j = 0; j < trainData.numInstances(); j++) { Instance instance = trainData.instance(j); CsvWriter writer = writers.get("AGENT" + agentCounter); CsvWriter arffwriter = arffWriters.get("AGENT" + agentCounter); String[] row = new String[instance.numAttributes()]; for (int a = 0; a < instance.numAttributes(); a++) { row[a] = instance.stringValue(a); } if (writer != null) { writer.writeRecord(row); } if (arffwriter != null) { arffwriter.writeRecord(row); } agentCounter++; if (agentCounter == agents) { agentCounter = 0; } } for (CsvWriter wr : writers.values()) { wr.close(); } for (CsvWriter arffwr : arffWriters.values()) { arffwr.close(); } } } catch (Exception e) { logger.severe("Exception while splitting dataset. ->"); logger.severe(e.getMessage()); System.exit(1); } logger.finest("Dataset for fold " + fold + " created."); } logger.finer("<-- splitDataset()"); }
From source file:etc.aloe.filters.AbstractRegexFilter.java
License:Open Source License
@Override protected Instance process(Instance instance) throws Exception { if (stringAttributeIndex < 0) { throw new IllegalStateException("String attribute not set"); }/* w w w .j a va 2s . c om*/ String stringValue = instance.stringValue(stringAttributeIndex); NamedRegex[] regexFeatures = getRegexFeatures(); int numOldValues = instance.numAttributes(); int numNewFeatures = regexFeatures.length; if (countRegexLengths) { numNewFeatures = regexFeatures.length * 2; } double[] newValues = new double[numOldValues + numNewFeatures]; // Copy all attributes from input to output for (int i = 0; i < getInputFormat().numAttributes(); i++) { if (getInputFormat().attribute(i).type() != Attribute.STRING) { // Add simple nominal and numeric attributes directly if (instance.value(i) != 0.0) { newValues[i] = instance.value(i); } } else { if (instance.isMissing(i)) { newValues[i] = Utils.missingValue(); } else { // If this is a string attribute, we have to first add // this value to the range of possible values, then add // its new internal index. if (outputFormatPeek().attribute(i).numValues() == 0) { // Note that the first string value in a // SparseInstance doesn't get printed. outputFormatPeek().attribute(i).addStringValue("Hack to defeat SparseInstance bug"); } int newIndex = outputFormatPeek().attribute(i).addStringValue(instance.stringValue(i)); newValues[i] = newIndex; } } } for (int i = 0; i < regexFeatures.length; i++) { Pattern pattern = regexFeatures[i].getPattern(); Matcher matches = pattern.matcher(stringValue); int count = 0; int maxLength = 0; while (matches.find()) { count++; int len = matches.group().length(); if (len > maxLength) { maxLength = len; } } int index = numOldValues + i; if (countRegexLengths) { index = numOldValues + 2 * i; } newValues[index] = count; if (countRegexLengths) { newValues[numOldValues + 2 * i + 1] = maxLength; } } Instance result = new SparseInstance(instance.weight(), newValues); return result; }
From source file:etc.aloe.filters.StringToDictionaryVector.java
License:Open Source License
/** * Converts the instance w/o normalization. * * @param instance the instance to convert * * @param ArrayList<Instance> the list of instances * @return the document length//from w w w . jav a 2 s .com */ private double convertInstancewoDocNorm(Instance instance, ArrayList<Instance> converted) { if (stringAttributeIndex < 0) { throw new IllegalStateException("String attribute index not valid"); } int numOldValues = instance.numAttributes(); double[] newValues = new double[numOldValues + m_selectedTerms.size()]; // Copy all attributes from input to output for (int i = 0; i < getInputFormat().numAttributes(); i++) { if (getInputFormat().attribute(i).type() != Attribute.STRING) { // Add simple nominal and numeric attributes directly if (instance.value(i) != 0.0) { newValues[i] = instance.value(i); } } else { if (instance.isMissing(i)) { newValues[i] = Utils.missingValue(); } else { // If this is a string attribute, we have to first add // this value to the range of possible values, then add // its new internal index. if (outputFormatPeek().attribute(i).numValues() == 0) { // Note that the first string value in a // SparseInstance doesn't get printed. outputFormatPeek().attribute(i).addStringValue("Hack to defeat SparseInstance bug"); } int newIndex = outputFormatPeek().attribute(i).addStringValue(instance.stringValue(i)); newValues[i] = newIndex; } } } String stringValue = instance.stringValue(stringAttributeIndex); double docLength = 0; HashMap<String, Integer> termMatches = m_selectedTermsTrie.countNonoverlappingMatches(stringValue); for (Map.Entry<String, Integer> entry : termMatches.entrySet()) { String term = entry.getKey(); int termIdx = m_selectedTermIndices.get(term); double matches = entry.getValue(); if (!m_OutputCounts && matches > 0) { matches = 1; } if (matches > 0) { if (m_TFTransform == true) { matches = Math.log(matches + 1); } if (m_IDFTransform == true) { matches = matches * Math.log(m_NumInstances / (double) m_DocsCounts[termIdx]); } newValues[numOldValues + termIdx] = matches; docLength += matches * matches; } } Instance result = new SparseInstance(instance.weight(), newValues); converted.add(result); return Math.sqrt(docLength); }
From source file:etc.aloe.filters.StringToDictionaryVector.java
License:Open Source License
/** * Normalizes given instance to average doc length (only the newly * constructed attributes).//from w w w. ja v a2 s .c o m * * @param inst the instance to normalize * @param double the document length * @throws Exception if avg. doc length not set */ private void normalizeInstance(Instance inst, double docLength) throws Exception { if (docLength == 0) { return; } int numOldValues = getInputFormat().numAttributes(); if (m_AvgDocLength < 0) { throw new Exception("Average document length not set."); } // Normalize document vector for (int j = numOldValues; j < inst.numAttributes(); j++) { double val = inst.value(j) * m_AvgDocLength / docLength; inst.setValue(j, val); } }
From source file:etc.aloe.filters.WordFeaturesExtractor.java
License:Open Source License
@Override protected Instance process(Instance instance) throws Exception { if (selectedAttributeIndex < 0) { throw new IllegalStateException("String attribute not set"); }// www .ja v a2s .c o m int numOldValues = instance.numAttributes(); int numNewFeatures = unigrams.size() + bigrams.size(); double[] newValues = new double[numOldValues + numNewFeatures]; // Copy all attributes from input to output for (int i = 0; i < getInputFormat().numAttributes(); i++) { if (getInputFormat().attribute(i).type() != Attribute.STRING) { // Add simple nominal and numeric attributes directly if (instance.value(i) != 0.0) { newValues[i] = instance.value(i); } } else { if (instance.isMissing(i)) { newValues[i] = Utils.missingValue(); } else { // If this is a string attribute, we have to first add // this value to the range of possible values, then add // its new internal index. if (outputFormatPeek().attribute(i).numValues() == 0) { // Note that the first string value in a // SparseInstance doesn't get printed. outputFormatPeek().attribute(i).addStringValue("Hack to defeat SparseInstance bug"); } int newIndex = outputFormatPeek().attribute(i).addStringValue(instance.stringValue(i)); newValues[i] = newIndex; } } } String stringValue = instance.stringValue(selectedAttributeIndex); if (instance.isMissing(selectedAttributeIndex) == false) { List<String> words = tokenizeDocument(instance); Set<String> wordSet = new HashSet<String>(words); for (int i = 0; i < unigrams.size(); i++) { String unigram = unigrams.get(i); int count = 0; if (wordSet.contains(unigram)) { //Count the times the word is in the document for (int w = 0; w < words.size(); w++) { if (words.get(w).equals(unigram)) { count += 1; } } } int featureIndex = numOldValues + i; newValues[featureIndex] = count; } for (int i = 0; i < bigrams.size(); i++) { Bigram bigram = bigrams.get(i); int count = bigram.getTimesInDocument(words); int featureIndex = numOldValues + unigrams.size() + i; newValues[featureIndex] = count; } } Instance result = new SparseInstance(instance.weight(), newValues); return result; }
From source file:expshell.NeuralClass.java
public static void main(String[] args) throws Exception { Instance testInst = new Instance(5); testInst.setValue(0, 2.1);/*from www . java 2 s . co m*/ testInst.setValue(1, 3.1); testInst.setValue(2, 4.1); testInst.setValue(3, 5.1); //the class testInst.setValue(4, 0.0); Layer l = new Layer(5, testInst.numAttributes() - 1); l.computeNode(testInst); }