List of usage examples for weka.core Instance attribute
public Attribute attribute(int index);
From source file:dkpro.similarity.experiments.sts2013baseline.util.Evaluator.java
License:Open Source License
public static void runLinearRegressionCV(Mode mode, Dataset... datasets) throws Exception { for (Dataset dataset : datasets) { // Set parameters int folds = 10; Classifier baseClassifier = new LinearRegression(); // Set up the random number generator long seed = new Date().getTime(); Random random = new Random(seed); // Add IDs to the instances AddID.main(new String[] { "-i", MODELS_DIR + "/" + mode.toString().toLowerCase() + "/" + dataset.toString() + ".arff", "-o", MODELS_DIR + "/" + mode.toString().toLowerCase() + "/" + dataset.toString() + "-plusIDs.arff" }); String location = MODELS_DIR + "/" + mode.toString().toLowerCase() + "/" + dataset.toString() + "-plusIDs.arff"; Instances data = DataSource.read(location); if (data == null) { throw new IOException("Could not load data from: " + location); }//from ww w . j a v a2 s. c o m data.setClassIndex(data.numAttributes() - 1); // Instantiate the Remove filter Remove removeIDFilter = new Remove(); removeIDFilter.setAttributeIndices("first"); // Randomize the data data.randomize(random); // Perform cross-validation Instances predictedData = null; Evaluation eval = new Evaluation(data); for (int n = 0; n < folds; n++) { Instances train = data.trainCV(folds, n, random); Instances test = data.testCV(folds, n); // Apply log filter Filter logFilter = new LogFilter(); logFilter.setInputFormat(train); train = Filter.useFilter(train, logFilter); logFilter.setInputFormat(test); test = Filter.useFilter(test, logFilter); // Copy the classifier Classifier classifier = AbstractClassifier.makeCopy(baseClassifier); // Instantiate the FilteredClassifier FilteredClassifier filteredClassifier = new FilteredClassifier(); filteredClassifier.setFilter(removeIDFilter); filteredClassifier.setClassifier(classifier); // Build the classifier filteredClassifier.buildClassifier(train); // Evaluate eval.evaluateModel(classifier, test); // Add predictions AddClassification filter = new AddClassification(); filter.setClassifier(classifier); filter.setOutputClassification(true); filter.setOutputDistribution(false); filter.setOutputErrorFlag(true); filter.setInputFormat(train); Filter.useFilter(train, filter); // trains the classifier Instances pred = Filter.useFilter(test, filter); // performs predictions on test set if (predictedData == null) { predictedData = new Instances(pred, 0); } for (int j = 0; j < pred.numInstances(); j++) { predictedData.add(pred.instance(j)); } } // Prepare output scores double[] scores = new double[predictedData.numInstances()]; for (Instance predInst : predictedData) { int id = new Double(predInst.value(predInst.attribute(0))).intValue() - 1; int valueIdx = predictedData.numAttributes() - 2; double value = predInst.value(predInst.attribute(valueIdx)); scores[id] = value; // Limit to interval [0;5] if (scores[id] > 5.0) { scores[id] = 5.0; } if (scores[id] < 0.0) { scores[id] = 0.0; } } // Output StringBuilder sb = new StringBuilder(); for (Double score : scores) { sb.append(score.toString() + LF); } FileUtils.writeStringToFile( new File(OUTPUT_DIR + "/" + mode.toString().toLowerCase() + "/" + dataset.toString() + ".csv"), sb.toString()); } }
From source file:edu.illinois.cs.cogcomp.saul.learn.SaulWekaWrapper.java
License:Open Source License
/** * Creates a WEKA Instance object out of a {@link FeatureVector}. **///from ww w .ja v a2 s . c o m private Instance makeInstance(LBJavaInstance instance) { // Initialize an Instance object Instance inst = new Instance(attributeInfo.size()); // Acknowledge that this instance will be a member of our dataset 'wekaInstances' inst.setDataset(wekaInstances); // set all nominal feature values to 0, which means those features are not used in this example for (int i = 1; i < attributeInfo.size(); i++) if (inst.attribute(i).isNominal()) inst.setValue(i, "0"); // Assign values for its attributes /* * Since we are iterating through this example's feature list, which does not contain the * label feature (the label feature is the first in the 'attribute' list), we set attIndex * to at exampleFeatures[featureIndices] + 1, while we start featureIndices at 0. */ for (int featureIndex = 0; featureIndex < instance.featureIndices.length; ++featureIndex) { int attIndex = instance.featureIndices[featureIndex] + 1; Feature f = lexicon.lookupKey(instance.featureIndices[featureIndex]); // if the feature does not exist, do nothing. this may occur in test set. if (f == null) continue; Attribute att = (Attribute) attributeInfo.elementAt(attIndex); // make sure the feature and the attribute match if (!(att.name().equals(f.toString()))) { System.err.println( "WekaWrapper: Error - makeInstance encountered a misaligned " + "attribute-feature pair."); System.err.println(" " + att.name() + " and " + f.toString() + " should have been identical."); new Exception().printStackTrace(); System.exit(1); } if (f.isDiscrete()) inst.setValue(attIndex, "1"); // this feature is used in this example so we set it to "1" else inst.setValue(attIndex, instance.featureValues[featureIndex]); } /* * Here, we assume that if either the labels FeatureVector is empty of features, or is null, * then this example is to be considered unlabeled. */ if (instance.labelIndices.length == 0) { inst.setClassMissing(); } else if (instance.labelIndices.length > 1) { System.err.println("WekaWrapper: Error - Weka Instances may only take a single class " + "value, "); new Exception().printStackTrace(); System.exit(1); } else { Feature label = labelLexicon.lookupKey(instance.labelIndices[0]); // make sure the label feature matches the n 0'th attribute if (!(label.getGeneratingClassifier().equals(((Attribute) attributeInfo.elementAt(0)).name()))) { System.err.println("WekaWrapper: Error - makeInstance found the wrong label name."); new Exception().printStackTrace(); System.exit(1); } if (!label.isDiscrete()) inst.setValue(0, instance.labelValues[0]); else inst.setValue(0, label.getStringValue()); } return inst; }
From source file:en_deep.mlprocess.manipulation.featmodif.FeatureModifierFilter.java
License:Open Source License
/** * Convert a single instance over if the class is nominal. The converted * instance is added to the end of the output queue. * * @param instance the instance to convert *///from w ww .java2 s. com private void convertInstance(Instance instance) { double[] vals = new double[outputFormatPeek().numAttributes()]; String[] stringVals = new String[vals.length]; int attSoFar = 0; for (int j = 0; j < getInputFormat().numAttributes(); j++) { Attribute att = instance.attribute(j); if (!m_Columns.isInRange(j)) { vals[attSoFar] = instance.value(j); attSoFar++; } else { // store new string values, make double values "missing" for now (if some string // values are missing, the double values will remain missing) if (instance.value(0) == 12 && instance.value(1) == 9 && att.name().equals("sempos")) { attSoFar = attSoFar; } attSoFar += getAttributeOutputValue(att, instance.value(j), vals, stringVals, attSoFar); } } Instance inst = null; if (instance instanceof SparseInstance) { inst = new SparseInstance(instance.weight(), vals); } else { inst = new DenseInstance(instance.weight(), vals); } inst.setDataset(getOutputFormat()); copyValues(inst, false, instance.dataset(), getOutputFormat()); // add new string values to the output data set and to the instance for (int i = 0; i < stringVals.length; ++i) { if (stringVals[i] != null) { vals[i] = inst.dataset().attribute(i).addStringValue(stringVals[i]); } } inst.replaceMissingValues(vals); inst.setDataset(getOutputFormat()); push(inst); }
From source file:en_deep.mlprocess.manipulation.featmodif.ReplaceMissing.java
License:Open Source License
/** * Convert a single instance over if the class is nominal. The converted * instance is added to the end of the output queue. * * @param instance the instance to convert *//* www. jav a2s . c o m*/ private void convertInstance(Instance instance) { // create a copy of the input instance Instance inst = null; if (instance instanceof SparseInstance) { inst = new SparseInstance(instance.weight(), instance.toDoubleArray()); } else { inst = new DenseInstance(instance.weight(), instance.toDoubleArray()); } // copy the string values from this instance as well (only the existing ones) inst.setDataset(getOutputFormat()); copyValues(inst, false, instance.dataset(), getOutputFormat()); // beware of weird behavior of this function (see source)!! inst.setDataset(getOutputFormat()); // find the missing values to be filled + the double values for the new "missing" label and store it double[] vals = instance.toDoubleArray(); for (int j = 0; j < getInputFormat().numAttributes(); j++) { Attribute att = instance.attribute(j); if (m_Columns.isInRange(j) && instance.isMissing(j)) { // find the "missing" value in the output nominal attribute if (att.isNominal()) { vals[j] = inst.dataset().attribute(j).indexOfValue(m_ReplVal); } // add a string value for the new "missing" label else if (att.isString()) { vals[j] = inst.dataset().attribute(j).addStringValue(m_ReplVal); } } } // fill in the missing values found inst.replaceMissingValues(vals); push(inst); }
From source file:irisdriver.IrisDriver.java
/** * @param args the command line arguments *//* w w w . j a va2 s .c om*/ public static void main(String[] args) { //As an example of arguments: sepallength=5.1 sepalwidth=3.5 petallength=1.4 petalwidth=0.2 try { Hashtable<String, String> values = new Hashtable<String, String>(); /*Iris irisModel = new Iris(); for(int i = 0; i < args.length; i++) { String[] tokens = args[i].split("="); values.put(tokens[0], tokens[1]); } System.out.println("Classification: " + irisModel.classifySpecies(values));*/ //Loading the model String pathModel = ""; String pathTestSet = ""; JFileChooser chooserModel = new JFileChooser(); chooserModel.setCurrentDirectory(new java.io.File(".")); chooserModel.setDialogTitle("Choose the model"); chooserModel.setFileSelectionMode(JFileChooser.FILES_AND_DIRECTORIES); chooserModel.setAcceptAllFileFilterUsed(true); if (chooserModel.showOpenDialog(null) == JFileChooser.APPROVE_OPTION) { File filePathModel = chooserModel.getSelectedFile(); pathModel = filePathModel.getPath(); Iris irisModel = new Iris(pathModel); //Loading the model JFileChooser chooserTestSet = new JFileChooser(); chooserTestSet.setDialogTitle("Choose TEST SET"); chooserTestSet.setFileSelectionMode(JFileChooser.FILES_AND_DIRECTORIES); chooserTestSet.setAcceptAllFileFilterUsed(true); //Loading the testing dataset if (chooserTestSet.showOpenDialog(null) == JFileChooser.APPROVE_OPTION) { File filePathTestSet = chooserTestSet.getSelectedFile(); pathTestSet = filePathTestSet.getPath(); //WRITTING THE OUTPUT: BufferedWriter writer = new BufferedWriter(new FileWriter("D:\\output_file.txt")); //Transforming the data set into pairs attribute-value ConverterUtils.DataSource unlabeledSource = new ConverterUtils.DataSource(pathTestSet); Instances unlabeledData = unlabeledSource.getDataSet(); if (unlabeledData.classIndex() == -1) { unlabeledData.setClassIndex(unlabeledData.numAttributes() - 1); } for (int i = 0; i < unlabeledData.numInstances(); i++) { Instance ins = unlabeledData.instance(i); //ins.numAttributes()-1 --> not to include the label for (int j = 0; j < ins.numAttributes() - 1; j++) { String attrib = ins.attribute(j).name(); double val = ins.value(ins.attribute(j)); values.put(attrib, String.valueOf(val)); } String predictedLabel = irisModel.classifySpecies(values); System.out.println("Classification: " + predictedLabel); values.clear(); //Writting the results in a txt writer.write("The label is: " + predictedLabel); //writer.newLine(); //writers.write("The error rate of the prediction is : " + eval.errorRate()); //writer.newLine(); } writer.flush(); writer.close(); } } } catch (Exception ex) { Logger.getLogger(IrisDriver.class.getName()).log(Level.SEVERE, null, ex); } }
From source file:j48.BinC45Split.java
License:Open Source License
/** * Returns index of subset instance is assigned to. * Returns -1 if instance is assigned to more than one subset. * * @exception Exception if something goes wrong *//*w w w. j a v a2 s . c o m*/ public final int whichSubset(Instance instance) throws Exception { if (instance.isMissing(m_attIndex)) return -1; else { if (instance.attribute(m_attIndex).isNominal()) { if ((int) m_splitPoint == (int) instance.value(m_attIndex)) return 0; else return 1; } else if (Utils.smOrEq(instance.value(m_attIndex), m_splitPoint)) return 0; else return 1; } }
From source file:j48.C45Split.java
License:Open Source License
/** * Returns index of subset instance is assigned to. Returns -1 if instance * is assigned to more than one subset./*from w ww . j av a2s . com*/ * * @exception Exception * if something goes wrong */ public final int whichSubset(Instance instance) throws Exception { if (instance.isMissing(m_attIndex)) return -1; else { if (instance.attribute(m_attIndex).isNominal()) return (int) instance.value(m_attIndex); else if (Utils.smOrEq(instance.value(m_attIndex), m_splitPoint)) return 0; else return 1; } }
From source file:j48.GraftSplit.java
License:Open Source License
/** * @param instance the instance for which to determine the subset * @return an int indicating the subset this instance belongs to */// w ww . ja v a 2 s . com public int whichSubset(Instance instance) { if (instance.isMissing(m_attIndex)) return -1; if (instance.attribute(m_attIndex).isNominal()) { // in the case of nominal, m_splitPoint is the = value, all else is != if (instance.value(m_attIndex) == m_splitPoint) return 0; else return 1; } else { if (Utils.smOrEq(instance.value(m_attIndex), m_splitPoint)) return 0; else return 1; } }
From source file:kea.KEAPhraseFilter.java
License:Open Source License
/** * Converts an instance by removing all non-alphanumeric characters * from its string attribute values./*from www . j ava2 s . co m*/ */ private void convertInstance(Instance instance) throws Exception { double[] instVals = new double[instance.numAttributes()]; for (int i = 0; i < instance.numAttributes(); i++) { if (!instance.attribute(i).isString() || instance.isMissing(i)) { instVals[i] = instance.value(i); } else { if (!m_SelectCols.isInRange(i)) { int index = getOutputFormat().attribute(i).addStringValue(instance.stringValue(i)); instVals[i] = (double) index; continue; } String str = instance.stringValue(i); StringBuffer resultStr = new StringBuffer(); int j = 0; boolean phraseStart = true; boolean seenNewLine = false; boolean haveSeenHyphen = false; boolean haveSeenSlash = false; while (j < str.length()) { boolean isWord = false; boolean potNumber = false; int startj = j; while (j < str.length()) { char ch = str.charAt(j); if (Character.isLetterOrDigit(ch)) { potNumber = true; if (Character.isLetter(ch)) { isWord = true; } j++; } else if ((!m_DisallowInternalPeriods && (ch == '.')) || (ch == '@') || (ch == '_') || (ch == '&') || (ch == '/') || (ch == '-')) { if ((j > 0) && (j + 1 < str.length()) && Character.isLetterOrDigit(str.charAt(j - 1)) && Character.isLetterOrDigit(str.charAt(j + 1))) { j++; } else { break; } } else if (ch == '\'') { if ((j > 0) && Character.isLetterOrDigit(str.charAt(j - 1))) { j++; } else { break; } } else { break; } } if (isWord == true) { if (!phraseStart) { if (haveSeenHyphen) { resultStr.append('-'); } else if (haveSeenSlash) { resultStr.append('/'); } else { resultStr.append(' '); } } resultStr.append(str.substring(startj, j)); if (j == str.length()) { break; } phraseStart = false; seenNewLine = false; haveSeenHyphen = false; haveSeenSlash = false; if (Character.isWhitespace(str.charAt(j))) { if (str.charAt(j) == '\n') { seenNewLine = true; } } else if (str.charAt(j) == '-') { haveSeenHyphen = true; } else if (str.charAt(j) == '/') { haveSeenSlash = true; } else { phraseStart = true; resultStr.append('\n'); } j++; } else if (j == str.length()) { break; } else if (str.charAt(j) == '\n') { if (seenNewLine) { if (phraseStart == false) { resultStr.append('\n'); phraseStart = true; } } else if (potNumber) { if (phraseStart == false) { phraseStart = true; resultStr.append('\n'); } } seenNewLine = true; j++; } else if (Character.isWhitespace(str.charAt(j))) { if (potNumber) { if (phraseStart == false) { phraseStart = true; resultStr.append('\n'); } } j++; } else { if (phraseStart == false) { resultStr.append('\n'); phraseStart = true; } j++; } } int index = getOutputFormat().attribute(i).addStringValue(resultStr.toString()); instVals[i] = (double) index; } } Instance inst = new Instance(instance.weight(), instVals); inst.setDataset(getOutputFormat()); push(inst); }
From source file:kea.NumbersFilter.java
License:Open Source License
/** * Converts an instance. A phrase boundary is inserted where * a number is found.//from w w w .jav a 2 s. c o m */ private void convertInstance(Instance instance) throws Exception { double[] instVals = new double[instance.numAttributes()]; for (int i = 0; i < instance.numAttributes(); i++) { if ((!instance.attribute(i).isString()) || instance.isMissing(i)) { instVals[i] = instance.value(i); } else { String str = instance.stringValue(i); StringBuffer resultStr = new StringBuffer(); StringTokenizer tok = new StringTokenizer(str, " \t\n", true); while (tok.hasMoreTokens()) { String token = tok.nextToken(); // Everything that doesn't contain at least // one letter is considered to be a number boolean isNumber = true; for (int j = 0; j < token.length(); j++) { if (Character.isLetter(token.charAt(j))) { isNumber = false; break; } } if (!isNumber) { resultStr.append(token); } else { if (token.equals(" ") || token.equals("\t") || token.equals("\n")) { resultStr.append(token); } else { resultStr.append(" \n "); } } } int index = getOutputFormat().attribute(i).addStringValue(resultStr.toString()); instVals[i] = (double) index; } } Instance inst = new Instance(instance.weight(), instVals); inst.setDataset(getOutputFormat()); push(inst); }