List of usage examples for weka.core Instances classIndex
publicint classIndex()
From source file:entity.NoiseInjectionManager.java
License:Open Source License
/** * // ww w . j ava2s.c om * Increments fp and fn by specified percentages. * Randomize order of instances and modifies instances until noise quota is reached. * Than randomized instances again. * NOTE: It modifies the given dataset, because it is a reference. * * @param origDataset * @param fpPercentage * @param fnPercentage * @return Instances noisyDataset */ public Instances addNoiseToDataset(Instances origDataset, BigDecimal fpPercentage, BigDecimal fnPercentage) { // exits if no noise must be added if (fnPercentage.equals(BigDecimal.ZERO) && fpPercentage.equals(BigDecimal.ZERO)) { if (verbose) System.out.println("[NoiseManager , addNoiseToDataset] nessun errore da aggiungere"); return origDataset; } // total instances in dataset int numInstances = origDataset.numInstances(); // finds positive (buggy) and negative (non-buggy) instances numbers int numOfPositives = 0; int numOfNegatives = 0; for (int j = 0; j < numInstances; j++) { if (origDataset.instance(j).stringValue(origDataset.classIndex()).equals(Settings.buggyLabel)) { numOfPositives++; } // this is a redundant control, but better safe than sorry else if (origDataset.instance(j).stringValue(origDataset.classIndex()).equals(Settings.nonbuggyLabel)) { numOfNegatives++; } } // calculates the number of false positives to insert int fpToInsert = (int) Math.round(numOfNegatives * fpPercentage.doubleValue() / 100); int fpInserted = 0; if (verbose) System.out.println("\n\n[NoiseManager , addNoiseToDataset] fpToInsert= " + fpToInsert + ", totIntances= " + origDataset.numInstances() + " true negatives= " + numOfNegatives + " %fp= " + fpPercentage); // calculates the number of false negatives to insert int fnToInsert = (int) Math.round(numOfPositives * fnPercentage.doubleValue() / 100); int fnInserted = 0; if (verbose) System.out.println("[NoiseManager , addNoiseToDataset] fnToInsert= " + fnToInsert + ", totIntances= " + origDataset.numInstances() + " true positives= " + numOfPositives + " %fn= " + fnPercentage); if (verbose) System.out.println("[NoiseManager , addNoiseToDataset] buggy label: " + Settings.buggyLabel + " - nonbuggy label: " + Settings.nonbuggyLabel); // randomize order of instances origDataset.randomize(RandomizationManager.randomGenerator); for (int i = 0; i < origDataset.numInstances(); i++) { if (verbose) System.out.print("\nORIGINAL VALUES: " + origDataset.instance(i).value(origDataset.attribute(origDataset.classIndex())) + " - " + origDataset.instance(i).stringValue(origDataset.classIndex())); // gets the classification attribute (it HAS to be the last) Attribute att = origDataset.instance(i).attribute(origDataset.classIndex()); // if there are fn to add and this is a positive instances it turns it into a negative, making it a fn if ((fnInserted < fnToInsert) && (origDataset.instance(i).stringValue(origDataset.classIndex()) .equals(Settings.buggyLabel))) { origDataset.instance(i).setValue(att, Settings.nonbuggyLabel); fnInserted++; if (verbose) System.out.print(" - added FN, added " + fnInserted + " of " + fnToInsert + " "); } // if there are fp to add and this is a negative instances it turns it into a positive, making it a fp else if ((fpInserted < fpToInsert) && (origDataset.instance(i).stringValue(origDataset.classIndex()) .equals(Settings.nonbuggyLabel))) { origDataset.instance(i).setValue(att, Settings.buggyLabel); fpInserted++; if (verbose) System.out.print(" - added FP, added " + fpInserted + " of " + fpToInsert + " "); } if (verbose) System.out.print(" FINAL ELEMENT VALUES: " + origDataset.instance(i).value(origDataset.attribute(origDataset.classIndex())) + " - " + origDataset.instance(i).stringValue(origDataset.classIndex())); } // randomize order of instances origDataset.randomize(RandomizationManager.randomGenerator); return origDataset; }
From source file:entity.NoiseInjectionManager.java
License:Open Source License
/** * Increments fp and fn in combination by a specified percentages. * Randomize order of instances and modifies instances until noise quota is reached. * Than randomized instances again.//from w ww .j a v a 2 s . co m * NOTE: It modifies the given dataset, because it is a reference. * * @param origDataset * @param combinedFpFnPercentage * @return noisydata */ public Instances addNoiseToDataset(Instances origDataset, BigDecimal combinedFpFnPercentage) { // exits if no noise must be added if (combinedFpFnPercentage.equals(BigDecimal.ZERO)) { if (verbose) System.out.println("[NoiseManager , addNoiseToDataset] nessun errore da aggiungere"); return origDataset; } // total instances in dataset int numInstances = origDataset.numInstances(); // finds positive (buggy) and negative (non-buggy) instances numbers int fpAndFnToInsert = (int) Math.round(numInstances * combinedFpFnPercentage.doubleValue() / 100); int fpAndFnInserted = 0; if (verbose) System.out.println("\n\n[NoiseManager , addNoiseToDataset] fpAndFnToInsert= " + fpAndFnToInsert + ", totIntances= " + origDataset.numInstances()); if (verbose) System.out.println("[NoiseManager , addNoiseToDataset] buggy label: " + Settings.buggyLabel + " - nonbuggy label: " + Settings.nonbuggyLabel); // randomize order of instances origDataset.randomize(RandomizationManager.randomGenerator); for (int i = 0; i < origDataset.numInstances(); i++) { if (verbose) System.out.print("\nORIGINAL VALUES: " + origDataset.instance(i).value(origDataset.attribute(origDataset.classIndex())) + " - " + origDataset.instance(i).stringValue(origDataset.classIndex())); // gets the classification attribute (it HAS to be the last) Attribute att = origDataset.instance(i).attribute(origDataset.classIndex()); // if there are fn or fp to add if (fpAndFnInserted < fpAndFnToInsert) { // if this is a positive instances it turns it into a negative, making it a fn if (origDataset.instance(i).stringValue(origDataset.classIndex()).equals(Settings.buggyLabel)) { if (verbose) System.out.print(" - added FN, added " + fpAndFnInserted + " of " + fpAndFnToInsert + " "); origDataset.instance(i).setValue(att, Settings.nonbuggyLabel); fpAndFnInserted++; } // if this is a negative instances it turns it into a positive, making it a fp else if (origDataset.instance(i).stringValue(origDataset.classIndex()) .equals(Settings.nonbuggyLabel)) { if (verbose) System.out.print(" - added FP, added " + fpAndFnInserted + " of " + fpAndFnToInsert + " "); origDataset.instance(i).setValue(att, Settings.buggyLabel); fpAndFnInserted++; } } if (verbose) System.out.print(" FINAL ELEMENT VALUES: " + origDataset.instance(i).value(origDataset.attribute(origDataset.classIndex())) + " - " + origDataset.instance(i).stringValue(origDataset.classIndex())); } // randomize order of instances origDataset.randomize(RandomizationManager.randomGenerator); return origDataset; }
From source file:etc.aloe.filters.StringToDictionaryVector.java
License:Open Source License
private int[] determineDictionary(Instances instances) { if (stringAttributeIndex < 0) { throw new IllegalStateException("String attribute index not valid"); }// w w w .j a v a 2s . com // Operate on a per-class basis if class attribute is set int classInd = instances.classIndex(); int values = 1; if (!m_doNotOperateOnPerClassBasis && (classInd != -1)) { values = instances.attribute(classInd).numValues(); } HashMap<String, Integer> termIndices = new HashMap<String, Integer>(); for (int i = 0; i < termList.size(); i++) { termIndices.put(termList.get(i), i); } //Create the trie for matching terms Trie termTrie = new Trie(termList); //Initialize the dictionary/count map ArrayList<HashMap<Integer, Count>> termCounts = new ArrayList<HashMap<Integer, Count>>(); for (int z = 0; z < values; z++) { termCounts.add(new HashMap<Integer, Count>()); } //Go through all the instances and count the emoticons for (int i = 0; i < instances.numInstances(); i++) { Instance instance = instances.instance(i); int vInd = 0; if (!m_doNotOperateOnPerClassBasis && (classInd != -1)) { vInd = (int) instance.classValue(); } //Get the string attribute to examine String stringValue = instance.stringValue(stringAttributeIndex); HashMap<Integer, Count> termCountsForClass = termCounts.get(vInd); HashMap<String, Integer> termMatches = termTrie.countNonoverlappingMatches(stringValue); for (Map.Entry<String, Integer> entry : termMatches.entrySet()) { String term = entry.getKey(); int termIdx = termIndices.get(term); int matches = entry.getValue(); Count count = termCountsForClass.get(termIdx); if (count == null) { count = new Count(0); termCountsForClass.put(termIdx, count); } if (matches > 0) { count.docCount += 1; count.count += matches; } } } // Figure out the minimum required word frequency int prune[] = new int[values]; for (int z = 0; z < values; z++) { HashMap<Integer, Count> termCountsForClass = termCounts.get(z); int array[] = new int[termCountsForClass.size()]; int pos = 0; for (Map.Entry<Integer, Count> entry : termCountsForClass.entrySet()) { array[pos] = entry.getValue().count; pos++; } // sort the array sortArray(array); if (array.length < m_WordsToKeep) { // if there aren't enough words, set the threshold to // minFreq prune[z] = m_minTermFreq; } else { // otherwise set it to be at least minFreq prune[z] = Math.max(m_minTermFreq, array[array.length - m_WordsToKeep]); } } // Add the word vector attributes (eliminating duplicates // that occur in multiple classes) HashSet<String> selectedTerms = new HashSet<String>(); for (int z = 0; z < values; z++) { HashMap<Integer, Count> termCountsForClass = termCounts.get(z); for (Map.Entry<Integer, Count> entry : termCountsForClass.entrySet()) { int termIndex = entry.getKey(); String term = termList.get(termIndex); Count count = entry.getValue(); if (count.count >= prune[z]) { selectedTerms.add(term); } } } //Save the selected terms as a list this.m_selectedTerms = new ArrayList<String>(selectedTerms); this.m_selectedTermsTrie = new Trie(this.m_selectedTerms); this.m_NumInstances = instances.size(); //Construct the selected terms to index map this.m_selectedTermIndices = new HashMap<String, Integer>(); for (int i = 0; i < m_selectedTerms.size(); i++) { m_selectedTermIndices.put(m_selectedTerms.get(i), i); } // Compute document frequencies, organized by selected term index (not original term index) int[] docsCounts = new int[m_selectedTerms.size()]; for (int i = 0; i < m_selectedTerms.size(); i++) { String term = m_selectedTerms.get(i); int termIndex = termIndices.get(term); int docsCount = 0; for (int z = 0; z < values; z++) { HashMap<Integer, Count> termCountsForClass = termCounts.get(z); Count count = termCountsForClass.get(termIndex); if (count != null) { docsCount += count.docCount; } } docsCounts[i] = docsCount; } return docsCounts; }
From source file:experimentalclassifier.ExperimentalClassifier.java
/** * @param args the command line arguments *///w ww . j av a 2 s . c o m public static void main(String[] args) throws Exception { DataSource source = new DataSource("data/iris.csv"); Instances data = source.getDataSet(); if (data.classIndex() == -1) { data.setClassIndex(data.numAttributes() - 1); } data.randomize(new Random()); String[] options = weka.core.Utils.splitOptions("-P 30"); RemovePercentage remove = new RemovePercentage(); remove.setOptions(options); remove.setInputFormat(data); Instances train = Filter.useFilter(data, remove); remove.setInvertSelection(true); remove.setInputFormat(data); Instances test = Filter.useFilter(data, remove); Classifier classifier = new HardCodedClassifier(); classifier.buildClassifier(train);//Currently, this does nothing Evaluation eval = new Evaluation(train); eval.evaluateModel(classifier, test); System.out.println(eval.toSummaryString("\nResults\n======\n", false)); }
From source file:expshell.ExpShell.java
/** * @param args the command line arguments * @throws java.lang.Exception// ww w.j ava 2s. co m */ public static void main(String[] args) throws Exception { String file = "C:\\Users\\YH Jonathan Kwok\\Documents\\NetBeansProjects\\ExpShell\\src\\expshell\\iris.csv"; DataSource source = new DataSource(file); Instances data = source.getDataSet(); if (data.classIndex() == -1) data.setClassIndex(data.numAttributes() - 1); //Randomize it data.randomize(new Random(1)); RemovePercentage rp = new RemovePercentage(); rp.setPercentage(70); rp.setInputFormat(data); Instances training = Filter.useFilter(data, rp); rp.setInvertSelection(true); rp.setInputFormat(data); Instances test = Filter.useFilter(data, rp); //standardize the data Standardize filter = new Standardize(); filter.setInputFormat(training); Instances newTest = Filter.useFilter(test, filter); Instances newTraining = Filter.useFilter(training, filter); //Part 5 - Now it's a knn Classifier knn = new NeuralClassifier(); knn.buildClassifier(newTraining); Evaluation eval = new Evaluation(newTraining); eval.evaluateModel(knn, newTest); System.out.println(eval.toSummaryString("***** Overall results: *****", false)); }
From source file:fantail.core.Tools.java
License:Open Source License
public static int getNumberTargets(Instances data) throws Exception { if (data == null) { throw new Exception("data can't be null."); }//from w w w . j a v a 2 s . co m if (data.numInstances() <= 0) { throw new Exception("data can't be empty."); } if (data.classIndex() < 0) { throw new Exception("class index is not set."); } Instance tempInst = data.instance(0); Instances targets = tempInst.relationalValue(data.classIndex()); return targets.numAttributes(); }
From source file:farm_ads.MyClassifier.java
public Instances readIntances(String URL) throws Exception { FarmAds fa = new FarmAds(URL); FarmAdsVector fav = new FarmAdsVector(); fav.writeFile("data\\dataVecto.dat", fa); DataSource source = new DataSource("data\\dataVecto.dat"); Instances instances = source.getDataSet(); if (instances.classIndex() == -1) { instances.setClassIndex(instances.numAttributes() - 1); }// w ww . j a v a 2 s . co m return instances; }
From source file:farm_ads.MyClassifier.java
public Instances readIntances(String URL, Hashtable att, Hashtable numAtt, String iv) throws Exception { FarmAds fa = new FarmAds(att, numAtt, iv, URL); FarmAdsVector fav = new FarmAdsVector(); fav.writeFile("data\\dataVecto.dat", fa); DataSource source = new DataSource("data\\dataVecto.dat"); Instances instances = source.getDataSet(); if (instances.classIndex() == -1) { instances.setClassIndex(instances.numAttributes() - 1); }//from w w w .j a v a 2s . c o m return instances; }
From source file:farm_ads.MyClassifier.java
public Instances readIntancesVecto(String URL) throws Exception { DataSource source = new DataSource(URL); Instances instances = source.getDataSet(); if (instances.classIndex() == -1) { instances.setClassIndex(instances.numAttributes() - 1); }/*from w ww . java2s . c o m*/ return instances; }
From source file:farm_ads.MyClassifier.java
public String ClassifyInstance(Classifier c, String instance) throws Exception { String format = "%4s %15s %15s\n"; FarmAds fa = new FarmAds(instance, 1); FarmAdsVector fav = new FarmAdsVector(); fav.writeFile("data\\dataVecto.dat", fa); DataSource source = new DataSource("data\\dataVecto.dat"); Instances instances = source.getDataSet(); if (instances.classIndex() == -1) { instances.setClassIndex(instances.numAttributes() - 1); }//from ww w. ja v a 2 s .c o m String s = new String(); s += "======= Kt qu d on qung co========\n"; s += String.format(format, "STT", "Trc d on", "Sau d on"); String[] classAds = { "Ph hp", "Khng Ph Hp" }; double actValue = instances.firstInstance().classValue(); Instance newInst = instances.firstInstance(); double pred = c.classifyInstance(newInst); s += String.format(format, Integer.toString(1), classAds[(int) actValue], classAds[(int) pred]); if (actValue == pred) { s += "\n\n ==> D on ng"; } else { s += "\n\n ==> D on sai"; } return s; }