List of usage examples for weka.core Instances sort
public void sort(Attribute att)
From source file:adams.flow.transformer.WekaSubsets.java
License:Open Source License
/** * Executes the flow item./*from ww w . j a v a 2 s . c o m*/ * * @return null if everything is fine, otherwise error message */ @Override protected String doExecute() { String result; Instances data; Double old; Double curr; int i; int index; Instance inst; result = null; m_Queue.clear(); // copy and sort data data = new Instances((Instances) m_InputToken.getPayload()); m_Index.setData(data); ; index = m_Index.getIntIndex(); data.sort(index); // create subsets old = null; i = 0; while (i < data.numInstances()) { inst = data.instance(i); curr = inst.value(index); if ((old == null) || !curr.equals(old)) { m_Queue.add(new Instances(data, data.numInstances())); old = curr; } m_Queue.get(m_Queue.size() - 1).add(inst); i++; } // compact subsets for (Instances sub : m_Queue) sub.compactify(); return result; }
From source file:adams.gui.visualization.instance.LoadDatasetDialog.java
License:Open Source License
/** * Returns the full dataset, can be null if none loaded. * * @return the full dataset//from w w w . j a v a 2 s.c o m */ public Instances getDataset() { int index; Instances result; result = new Instances(m_Instances); if (m_ComboBoxSorting.getSelectedIndex() > 0) result.sort(m_ComboBoxSorting.getSelectedIndex() - 1); index = m_ComboBoxClass.getSelectedIndex(); if (index > -1) index--; result.setClassIndex(index); return result; }
From source file:de.ugoe.cs.cpdp.dataprocessing.SimulationFilter.java
License:Apache License
@Override public void apply(Instances testdata, Instances traindata) { Instances newDataSet = new Instances(traindata); traindata.delete();/* w ww . ja va 2s .co m*/ HashMap<Double, Instance> artifactNames = new HashMap<Double, Instance>(); // This is to add all data, where the first occurence of the file has a bug ArrayList<Double> firstOccurenceArtifactNames = new ArrayList<Double>(); // Sort dataset (StateID is connected to the date of commit: Lower StateID // means earlier commit than a higher stateID) Attribute wekaAttribute = newDataSet.attribute("Artifact.Target.StateID"); newDataSet.sort(wekaAttribute); /* * Logical summary: If there is an instance that dont have a bug, put it into the hashmap * (only unique values in there) * * If there is an instance, that hava a bug look up if it is in the hashmap already (this * means: it does not had a bug before!): If this is true add it to a new dataset and remove * it from the hashmap, so that new changes from "nonBug" -> "bug" for this file can be * found. * * If the instance has a bug and is not in the hashmap (this means: The file has a bug with * its first occurence or this file only has bugs and not an instance with no bug), then (if * it is not in the arrayList above) add it to the new dataset. This way it is possible to * get the first occurence of a file, which has a bug */ for (int i = 0; i < newDataSet.numInstances(); i++) { Instance wekaInstance = newDataSet.instance(i); double newBugLabel = wekaInstance.classValue(); Attribute wekaArtifactName = newDataSet.attribute("Artifact.Name"); Double artifactName = wekaInstance.value(wekaArtifactName); if (newBugLabel == 0.0 && artifactNames.keySet().contains(artifactName)) { artifactNames.put(artifactName, wekaInstance); } else if (newBugLabel == 0.0 && !artifactNames.keySet().contains(artifactName)) { artifactNames.put(artifactName, wekaInstance); } else if (newBugLabel == 1.0 && artifactNames.keySet().contains(artifactName)) { traindata.add(wekaInstance); artifactNames.remove(artifactName); } else if (newBugLabel == 1.0 && !artifactNames.keySet().contains(artifactName)) { if (!firstOccurenceArtifactNames.contains(artifactName)) { traindata.add(wekaInstance); firstOccurenceArtifactNames.add(artifactName); } } } // If we have a file, that never had a bug (this is, when it is NOT in the // new created dataset, but it is in the HashMap from above) add it to // the new dataset double[] artifactNamesinNewDataSet = traindata.attributeToDoubleArray(0); HashMap<Double, Instance> artifactNamesCopy = new HashMap<Double, Instance>(artifactNames); for (Double artifactName : artifactNames.keySet()) { for (int i = 0; i < artifactNamesinNewDataSet.length; i++) { if (artifactNamesinNewDataSet[i] == artifactName) { artifactNamesCopy.remove(artifactName); } } } for (Double artifact : artifactNamesCopy.keySet()) { traindata.add(artifactNamesCopy.get(artifact)); } }
From source file:Helper.CustomFilter.java
public Instances convertNumericRange(Instances structure) throws Exception { for (int i = 0; i < structure.numAttributes() - 1; i++) { if (structure.attribute(i).typeToString(structure.attribute(i)).equals("numeric")) { structure.sort(i); structure = toRange(structure, i); }/*from ww w . j a v a2 s . com*/ } return structure; }
From source file:j48.BinC45Split.java
License:Open Source License
/** * Creates a C4.5-type split on the given data. * * @exception Exception if something goes wrong *//* w w w . j a va 2 s. c om*/ public void buildClassifier(Instances trainInstances) throws Exception { // Initialize the remaining instance variables. m_numSubsets = 0; m_splitPoint = Double.MAX_VALUE; m_infoGain = 0; m_gainRatio = 0; // Different treatment for enumerated and numeric // attributes. if (trainInstances.attribute(m_attIndex).isNominal()) { handleEnumeratedAttribute(trainInstances); } else { trainInstances.sort(trainInstances.attribute(m_attIndex)); handleNumericAttribute(trainInstances); } }
From source file:j48.C45Split.java
License:Open Source License
public void buildClassifier(Instances trainInstances) throws Exception { // Initialize the remaining instance variables. m_numSubsets = 0;/*w w w . j av a 2 s . c o m*/ m_splitPoint = Double.MAX_VALUE; m_infoGain = 0; m_gainRatio = 0; // Different treatment for enumerated and numeric // attributes. if (trainInstances.attribute(m_attIndex).isNominal()) { m_complexityIndex = trainInstances.attribute(m_attIndex).numValues(); m_index = m_complexityIndex; handleEnumeratedAttribute(trainInstances); } else { m_complexityIndex = 2; m_index = 0; trainInstances.sort(trainInstances.attribute(m_attIndex)); // /////////////////////////////////////////////////////////////////////////////////////// double stdDev = trainInstances.attributeStats(m_attIndex).numericStats.stdDev; if (stdDev > 200) { // rrrrr = stdDev/200; // System.out.println(stdDev+" "); rrrrr = Math.log10(stdDev) / 1.2; // rrrrr = 1.1; // lllll = stdDev/2000; // lllll = 0.3; lllll = Math.log10(stdDev) / 8; } else { lllll = Math.log10(stdDev) / 1.2; // lllll = stdDev/200; // lllll = 1.1; // rrrrr = stdDev/2000; // rrrrr = 0.3; rrrrr = Math.log10(stdDev) / 8; } handleNumericAttribute(trainInstances); } }
From source file:j48.NBTreeSplit.java
License:Open Source License
/** * Creates a NBTree-type split on the given data. Assumes that none of * the class values is missing./*from ww w . ja v a 2s. c o m*/ * * @exception Exception if something goes wrong */ public void buildClassifier(Instances trainInstances) throws Exception { // Initialize the remaining instance variables. m_numSubsets = 0; m_splitPoint = Double.MAX_VALUE; m_errors = 0; if (m_globalNB != null) { m_errors = m_globalNB.getErrors(); } // Different treatment for enumerated and numeric // attributes. if (trainInstances.attribute(m_attIndex).isNominal()) { m_complexityIndex = trainInstances.attribute(m_attIndex).numValues(); handleEnumeratedAttribute(trainInstances); } else { m_complexityIndex = 2; trainInstances.sort(trainInstances.attribute(m_attIndex)); handleNumericAttribute(trainInstances); } }
From source file:jjj.asap.sas.ensemble.impl.CrossValidatedEnsemble.java
License:Open Source License
@Override public StrongLearner build(int essaySet, String ensembleName, List<WeakLearner> learners) { // can't handle empty case if (learners.isEmpty()) { return this.ensemble.build(essaySet, ensembleName, learners); }//from w ww . j a v a2s . com // create a dummy dataset. DatasetBuilder builder = new DatasetBuilder(); builder.addVariable("id"); builder.addNominalVariable("class", Contest.getRubrics(essaySet)); Instances dummy = builder.getDataset("dummy"); // add data Map<Double, Double> groundTruth = Contest.getGoldStandard(essaySet); for (double id : learners.get(0).getPreds().keySet()) { dummy.add(new DenseInstance(1.0, new double[] { id, groundTruth.get(id) })); } // stratify dummy.sort(0); dummy.randomize(new Random(1)); dummy.setClassIndex(1); dummy.stratify(nFolds); // now evaluate each fold Map<Double, Double> preds = new HashMap<Double, Double>(); for (int k = 0; k < nFolds; k++) { Instances train = dummy.trainCV(nFolds, k); Instances test = dummy.testCV(nFolds, k); List<WeakLearner> cvLeaners = new ArrayList<WeakLearner>(); for (WeakLearner learner : learners) { WeakLearner copy = learner.copyOf(); for (int i = 0; i < test.numInstances(); i++) { copy.getPreds().remove(test.instance(i).value(0)); copy.getProbs().remove(test.instance(i).value(0)); } cvLeaners.add(copy); } // train on fold StrongLearner cv = this.ensemble.build(essaySet, ensembleName, cvLeaners); List<WeakLearner> testLeaners = new ArrayList<WeakLearner>(); for (WeakLearner learner : cv.getLearners()) { WeakLearner copy = learner.copyOf(); copy.getPreds().clear(); copy.getProbs().clear(); WeakLearner source = find(copy.getName(), learners); for (int i = 0; i < test.numInstances(); i++) { double id = test.instance(i).value(0); copy.getPreds().put(id, source.getPreds().get(id)); copy.getProbs().put(id, source.getProbs().get(id)); } testLeaners.add(copy); } preds.putAll(this.ensemble.classify(essaySet, ensembleName, testLeaners, cv.getContext())); } // now prepare final result StrongLearner strong = this.ensemble.build(essaySet, ensembleName, learners); double trainingError = strong.getKappa(); double cvError = Calc.kappa(essaySet, preds, groundTruth); // Job.log(essaySet+"-"+ensembleName, "XVAL: training error = " + trainingError + " cv error = " + cvError); strong.setKappa(cvError); return strong; }
From source file:lu.lippmann.cdb.datasetview.tasks.SortInstancesTask.java
License:Open Source License
/** * {@inheritDoc}//www.ja va2 s . c om */ @Override Instances process0(final Instances dataSet) throws Exception { final String s = (String) JOptionPane.showInputDialog(null, "Select an attribute:\n", "Attribute selection", JOptionPane.PLAIN_MESSAGE, null, WekaDataStatsUtil.getAttributeNames(dataSet).toArray(), ""); if (s != null) dataSet.sort(dataSet.attribute(s)); return dataSet; }
From source file:lu.lippmann.cdb.ext.hydviga.gaps.GapFiller.java
License:Open Source License
private Instances fillAllGaps(final Instances ds) throws Exception { Instances newds = new Instances(ds); final int firstDateIdx = WekaDataStatsUtil.getFirstDateAttributeIdx(newds); final String datename = newds.attribute(firstDateIdx).name(); if (firstDateIdx == -1) { throw new Exception("No date attribute in this dataset!"); }//from w w w . j a va2 s .c o m /* add a 'fake numerical' time field */ newds.insertAttributeAt(new Attribute(datename + "_fake"), newds.numAttributes()); for (int i = 0; i < newds.numInstances(); i++) { newds.instance(i).setValue(newds.numAttributes() - 1, newds.instance(i).value(firstDateIdx)); } /* remove the 'true' time field */ newds.deleteAttributeAt(firstDateIdx); /* process the dataset */ newds = fillGaps0(newds); /* re-add the 'true' time field according to the 'fake numerical' time field */ final String df = ds.attribute(firstDateIdx).getDateFormat(); newds.insertAttributeAt(new Attribute(datename + "_new", df), newds.numAttributes()); for (int i = 0; i < newds.numInstances(); i++) { newds.instance(i).setValue(newds.numAttributes() - 1, newds.instance(i).value(newds.numAttributes() - 2)); } /* delete the 'fake numerical' time field */ newds.deleteAttributeAt(newds.numAttributes() - 2); newds.sort(newds.numAttributes() - 1); return newds; }