List of usage examples for weka.core Instances Instances
public Instances(String name, ArrayList<Attribute> attInfo, int capacity)
From source file:de.ugoe.cs.cpdp.loader.AUDIChangeLoader.java
License:Apache License
@Override public Instances load(File file) { final String[] lines; String[] lineSplit;//from ww w.j ava 2 s .c om String[] lineSplitBug; try { lines = FileTools.getLinesFromFile(file.getAbsolutePath()); } catch (IOException e) { throw new RuntimeException(e); } // information about bugs are in another file String path = file.getAbsolutePath(); path = path.substring(0, path.length() - 14) + "repro.csv"; final String[] linesBug; try { linesBug = FileTools.getLinesFromFile(path); } catch (IOException e) { throw new RuntimeException(e); } int revisionIndex = -1; int bugIndex = -1; lineSplitBug = linesBug[0].split(";"); for (int j = 0; j < lineSplitBug.length; j++) { if (lineSplitBug[j].equals("svnrev")) { revisionIndex = j; } if (lineSplitBug[j].equals("num_bugs_trace")) { bugIndex = j; } } if (revisionIndex < 0) { throw new RuntimeException("could not find SVN revisions"); } if (bugIndex < 0) { throw new RuntimeException("could not find bug information"); } int metricsStartIndex = -1; int metricsEndIndex = -1; lineSplit = lines[0].split(";"); for (int j = 0; j < lineSplit.length; j++) { if (lineSplit[j].equals("lm_LOC")) { metricsStartIndex = j; } if (lineSplit[j].equals("h_E")) { metricsEndIndex = j; } } if (metricsStartIndex < 0) { throw new RuntimeException("could not find first metric, i.e., lm_LOC"); } if (metricsEndIndex < 0) { throw new RuntimeException("could not find last metric, i.e., h_E"); } int numMetrics = metricsEndIndex - metricsStartIndex + 1; // create sets of all filenames and revisions SortedMap<EntityRevisionPair, Integer> entityRevisionPairs = new TreeMap<>(); for (int i = 1; i < linesBug.length; i++) { lineSplitBug = linesBug[i].split(";"); entityRevisionPairs .put(new EntityRevisionPair(lineSplitBug[0], Integer.parseInt(lineSplitBug[revisionIndex])), i); } // prepare weka instances final ArrayList<Attribute> atts = new ArrayList<Attribute>(); lineSplit = lines[0].split(";"); for (int j = metricsStartIndex; j <= metricsEndIndex; j++) { atts.add(new Attribute(lineSplit[j] + "_delta")); } for (int j = metricsStartIndex; j <= metricsEndIndex; j++) { atts.add(new Attribute(lineSplit[j] + "_abs")); } final ArrayList<String> classAttVals = new ArrayList<String>(); classAttVals.add("0"); classAttVals.add("1"); final Attribute classAtt = new Attribute("bug", classAttVals); atts.add(classAtt); final Instances data = new Instances(file.getName(), atts, 0); data.setClass(classAtt); // create data String lastFile = null; double[] lastValues = null; int lastNumBugs = 0; for (Entry<EntityRevisionPair, Integer> entry : entityRevisionPairs.entrySet()) { try { // first get values lineSplit = lines[entry.getValue()].split(";"); lineSplitBug = linesBug[entry.getValue()].split(";"); int i = 0; double[] values = new double[numMetrics]; for (int j = metricsStartIndex; j <= metricsEndIndex; j++) { values[i] = Double.parseDouble(lineSplit[j]); i++; } int numBugs = Integer.parseInt(lineSplitBug[bugIndex]); // then check if an entity must be created if (entry.getKey().entity.equals(lastFile)) { // create new instance double[] instanceValues = new double[2 * numMetrics + 1]; for (int j = 0; j < numMetrics; j++) { instanceValues[j] = values[j] - lastValues[j]; instanceValues[j + numMetrics] = values[j]; } // check if any value>0 boolean changeOccured = false; for (int j = 0; j < numMetrics; j++) { if (instanceValues[j] > 0) { changeOccured = true; } } if (changeOccured) { instanceValues[instanceValues.length - 1] = numBugs <= lastNumBugs ? 0 : 1; data.add(new DenseInstance(1.0, instanceValues)); } } lastFile = entry.getKey().entity; lastValues = values; lastNumBugs = numBugs; } catch (IllegalArgumentException e) { System.err.println("error in line " + entry.getValue() + ": " + e.getMessage()); System.err.println("metrics line: " + lines[entry.getValue()]); System.err.println("bugs line: " + linesBug[entry.getValue()]); System.err.println("line is ignored"); } } return data; }
From source file:de.ugoe.cs.cpdp.loader.AUDIChangeLoader.java
License:Apache License
public Instances load(File file, String dummy) { final String[] lines; try {/*from w ww . j a va 2 s.co m*/ lines = FileTools.getLinesFromFile(file.getAbsolutePath()); } catch (IOException e) { throw new RuntimeException(e); } // information about bugs are in another file String path = file.getAbsolutePath(); path = path.substring(0, path.length() - 14) + "repro.csv"; final String[] linesBug; try { linesBug = FileTools.getLinesFromFile(path); } catch (IOException e) { throw new RuntimeException(e); } // configure Instances final ArrayList<Attribute> atts = new ArrayList<Attribute>(); String[] lineSplit = lines[0].split(";"); // ignore first three/four and last two columns int offset; if (lineSplit[3].equals("project_rev")) { offset = 4; } else { offset = 3; } for (int j = 0; j < lineSplit.length - (offset + 2); j++) { atts.add(new Attribute(lineSplit[j + offset])); } final ArrayList<String> classAttVals = new ArrayList<String>(); classAttVals.add("0"); classAttVals.add("1"); final Attribute classAtt = new Attribute("bug", classAttVals); atts.add(classAtt); final Instances data = new Instances(file.getName(), atts, 0); data.setClass(classAtt); // fetch data for (int i = 1; i < lines.length; i++) { boolean validInstance = true; lineSplit = lines[i].split(";"); String[] lineSplitBug = linesBug[i].split(";"); double[] values = new double[data.numAttributes()]; for (int j = 0; validInstance && j < values.length - 1; j++) { if (lineSplit[j + offset].trim().isEmpty()) { validInstance = false; } else { values[j] = Double.parseDouble(lineSplit[j + offset].trim()); } } if (offset == 3) { values[values.length - 1] = lineSplitBug[7].equals("0") ? 0 : 1; } else { values[values.length - 1] = lineSplitBug[8].equals("0") ? 0 : 1; } if (validInstance) { data.add(new DenseInstance(1.0, values)); } else { System.out.println("instance " + i + " is invalid"); } } return data; }
From source file:de.ugoe.cs.cpdp.loader.AUDIDataLoader.java
License:Apache License
@Override public Instances load(File file) { final String[] lines; try {/*from w w w. j av a 2 s . co m*/ lines = FileTools.getLinesFromFile(file.getAbsolutePath()); } catch (IOException e) { throw new RuntimeException(e); } // information about bugs are in another file String path = file.getAbsolutePath(); path = path.substring(0, path.length() - 14) + "repro.csv"; final String[] linesBug; try { linesBug = FileTools.getLinesFromFile(path); } catch (IOException e) { throw new RuntimeException(e); } // configure Instances final ArrayList<Attribute> atts = new ArrayList<Attribute>(); String[] lineSplit = lines[0].split(";"); // ignore first three/four and last two columns int offset; if (lineSplit[3].equals("project_rev")) { offset = 4; } else { offset = 3; } for (int j = 0; j < lineSplit.length - (offset + 2); j++) { atts.add(new Attribute(lineSplit[j + offset])); } final ArrayList<String> classAttVals = new ArrayList<String>(); classAttVals.add("0"); classAttVals.add("1"); final Attribute classAtt = new Attribute("bug", classAttVals); atts.add(classAtt); final Instances data = new Instances(file.getName(), atts, 0); data.setClass(classAtt); // fetch data for (int i = 1; i < lines.length; i++) { boolean validInstance = true; lineSplit = lines[i].split(";"); String[] lineSplitBug = linesBug[i].split(";"); double[] values = new double[data.numAttributes()]; for (int j = 0; validInstance && j < values.length - 1; j++) { if (lineSplit[j + offset].trim().isEmpty()) { validInstance = false; } else { values[j] = Double.parseDouble(lineSplit[j + offset].trim()); } } if (offset == 3) { values[values.length - 1] = lineSplitBug[7].equals("0") ? 0 : 1; } else { values[values.length - 1] = lineSplitBug[8].equals("0") ? 0 : 1; } if (validInstance) { data.add(new DenseInstance(1.0, values)); } else { System.out.println("instance " + i + " is invalid"); } } return data; }
From source file:de.ugoe.cs.cpdp.loader.CSVMockusDataLoader.java
License:Apache License
@Override public Instances load(File file) { final String[] lines; try {//from w ww. j av a2s . c o m lines = FileTools.getLinesFromFile(file.getAbsolutePath()); } catch (IOException e) { throw new RuntimeException(e); } // configure Instances final ArrayList<Attribute> atts = new ArrayList<Attribute>(); String[] lineSplit = lines[0].split(","); for (int j = 0; j < lineSplit.length - 3; j++) { atts.add(new Attribute(lineSplit[j + 2])); } final ArrayList<String> classAttVals = new ArrayList<String>(); classAttVals.add("0"); classAttVals.add("1"); final Attribute classAtt = new Attribute("bug", classAttVals); atts.add(classAtt); final Instances data = new Instances(file.getName(), atts, 0); data.setClass(classAtt); // fetch data for (int i = 1; i < lines.length; i++) { lineSplit = lines[i].split(","); double[] values = new double[lineSplit.length - 2]; for (int j = 0; j < values.length - 1; j++) { values[j] = Double.parseDouble(lineSplit[j + 2].trim()); } values[values.length - 1] = lineSplit[lineSplit.length - 1].trim().equals("0") ? 0 : 1; data.add(new DenseInstance(1.0, values)); } return data; }
From source file:de.ugoe.cs.cpdp.loader.DecentDataLoader.java
License:Apache License
/** * Creates a Weka Instances set out of a arffx model * //from w w w . j a va 2s . c o m * @param m * arffx model * @return */ private Instances createWekaDataFormat(Model m) { // Bad solution, can be enhanced (continue in for loop) ArrayList<Attribute> datasetAttributes = new ArrayList<Attribute>(); for (de.ugoe.cs.cpdp.decentApp.models.arffx.Attribute attribute : m.getAttributes()) { String attributeName = attribute.getName(); if (attributeFilter.contains(attributeName)) { continue; } Attribute wekaAttr; // Is attribute a LABEL.* attribute? if (isLabel(attributeName)) { // Classattribute final ArrayList<String> classAttVals = new ArrayList<String>(); classAttVals.add("false"); classAttVals.add("true"); wekaAttr = new Attribute(attributeName, classAttVals); } else if (isConfidenceLabel(attributeName)) { // Is attribute a CONFIDENCE.* attribute? ArrayList<String> labels = new ArrayList<String>(); labels.add("high"); labels.add("low"); wekaAttr = new Attribute(attributeName, labels); } else { // Is it a numeric attribute? wekaAttr = new Attribute(attributeName); } datasetAttributes.add(wekaAttr); } return new Instances("test-dataset", datasetAttributes, 0); }
From source file:de.uniheidelberg.cl.swp.mlprocess.AblationTesting.java
License:Apache License
/** * Copies the Instances from the source Instances object to a new one, which only contains the * currently tested features.//from ww w . ja va 2 s .com * * @param source The Instances object containing all the Instance objects from the source file. * @param targetStructure The list of {@link AbstractFeatureExtractor}s which is currently * being tested. * @return An instances object consisting of all Instance objects from the source file. */ private Instances copyInstances(Instances source, ArrayList<Attribute> targetStructure) { Instances target = new Instances("ACResolution", targetStructure, 0); for (int i = 0; i < source.numInstances(); i++) { double[] vals = new double[targetStructure.size()]; for (int z = 0; z < targetStructure.size(); z++) { vals[z] = getAttributeValue(source.instance(i), targetStructure.get(z).name()); } Instance in = new DenseInstance(1.0, vals); target.add(in); } return target; }
From source file:de.uniheidelberg.cl.swp.mlprocess.InstanceContainer.java
License:Apache License
/** * Takes a list of {@link AbstractFeatureExtractor} and adds the corresponding feature as a * WEKA Attribute to the Instances structure. * /* w ww.ja v a2 s . c om*/ * @param featureList The list of FeatureExtractors to be added. */ public void createAttributeStructure(List<AbstractFeatureExtractor> featureList) { if (this.instances != null) { return; } ArrayList<Attribute> wekaAttributes = new ArrayList<Attribute>(featureList.size()); for (AbstractFeatureExtractor fe : featureList) { wekaAttributes.add(fe.getWekaAttribute()); } /* Add the ACR-Systems as feature */ List<String> acrFeature = new ArrayList<String>(); acrFeature.add(JavaRapRunner.class.getCanonicalName()); acrFeature.add(LingpipeRunner.class.getCanonicalName()); acrFeature.add(BARTRunner.class.getCanonicalName()); List<String> correctFalsePrediction = new ArrayList<String>(); /* Add the possible prediction values */ correctFalsePrediction.add("+"); correctFalsePrediction.add("-"); correctFalsePrediction.add("?"); wekaAttributes.add(new Attribute("acrSystem", acrFeature)); wekaAttributes.add(new Attribute("corretOutputBySystem", correctFalsePrediction)); this.instances = new Instances("ACResolution", wekaAttributes, 0); this.instances.setClassIndex(this.instances.numAttributes() - 1); }
From source file:de.uni_koeln.phil_fak.iv.tm.p4.classification.WekaAdapter.java
License:Open Source License
private Instances initTraininSet(Set<Document> trainingData) { /* Der FastVector enthlt die Merkmale: */ FastVector structureVector = new FastVector(vectorSize + 1); /* Die Klasse wird in Weka auch als Merkmalsvektor dargestellt: */ FastVector classesVector = new FastVector(this.classes.size()); for (String c : classes) { /*/*from w w w.j av a 2 s .c o m*/ * Die Klasse ist nicht numerisch, deshalb muessen alle mglichen * Werte angegeben werden: */ classesVector.addElement(c); } /* An Stelle 0 unseres Gesamtvektors kommt der Klassen-Vektor: */ structureVector.addElement(new Attribute("Ressort", classesVector)); for (int i = 0; i < vectorSize; i++) { /* * An jeder Position unseres Merkmalsvektors haben wir ein * numerisches Merkmal (reprsentiert als Attribute), dessen Name * sein Index ist: */ structureVector.addElement(new Attribute(i + "")); // Merkmal i, // d.h. was? > // TF-IDF } /* * Schliesslich erstellen wir einen Container fr unsere * Trainingsbeispiele, der Instanzen der beschriebenen Merkmale * enthalten wird: */ Instances result = new Instances("InstanceStructure", structureVector, vectorSize + 1); /* * Wobei wir noch angeben muessen, an welcher Stelle der * Merkmalsvektoren die Klasse zu finden ist: */ result.setClassIndex(0); return result; }
From source file:de.uni_koeln.spinfo.classification.zoneAnalysis.classifier.WekaClassifier.java
License:Open Source License
private Instances initTrainingSet(List<ClassifyUnit> trainingData) { int vectorSize = trainingData.get(0).getFeatureVector().length; Set<Integer> classIDs = new TreeSet<Integer>(); for (ClassifyUnit classifyUnit : trainingData) { ZoneClassifyUnit actual = (ZoneClassifyUnit) classifyUnit; classIDs.add(actual.getActualClassID()); }/*w ww. j a v a 2 s . c om*/ /* Der Vektor enthlt die numerischen Merkmale (bei uns: tf-idf-Werte) sowie ein Klassenattribut: */ ArrayList<Attribute> structureVector = new ArrayList<Attribute>(vectorSize + 1); /* Auch die Klasse wird in Weka als Vektor dargestellt: */ ArrayList<String> classesVector = new ArrayList<String>(); for (Integer c : classIDs) { /* * Da das Klassen-Attribut nicht numerisch ist (sondern, in Weka-Terminologie, ein nominales bzw. * String-Attribut), mssen hier alle mglichen Attributwerte angegeben werden: */ classesVector.add(c + ""); } /* An Stelle 0 unseres Strukturvektors kommt der Klassen-Vektor: */ structureVector.add(new Attribute("topic", classesVector)); for (int i = 0; i < vectorSize; i++) { /* * An jeder weiteren Position unseres Merkmalsvektors haben wir ein numerisches Merkmal (reprsentiert als * Attribute), dessen Name hier einfach seine Indexposition ist: */ structureVector.add(new Attribute(i + "")); // Merkmal i, d.h. was? > TF-IDF } /* * Schliesslich erstellen wir einen Container, der Instanzen in der hier beschriebenen Struktur enthalten wird * (also unsere Trainingsbeispiele): */ Instances result = new Instances("InstanceStructure", structureVector, vectorSize + 1); /* * Wobei wir hier erneut angeben muessen, an welcher Stelle der Merkmalsvektoren die Klasse zu finden ist: */ result.setClassIndex(0); return result; }
From source file:de.upb.timok.utils.DatasetTransformationUtils.java
License:Open Source License
public static Instances trainingSetToInstances(List<double[]> trainingSet) { final double[] sample = trainingSet.get(0); final ArrayList<Attribute> fvWekaAttributes = new ArrayList<>(sample.length + 1); for (int i = 0; i < sample.length; i++) { fvWekaAttributes.add(new Attribute(Integer.toString(i))); }/*from w ww. j a v a 2 s.c om*/ final ArrayList<String> classStrings = new ArrayList<>(); classStrings.add("normal"); final Attribute ClassAttribute = new Attribute("class", classStrings); // Declare the feature vector fvWekaAttributes.add(ClassAttribute); final Instances result = new Instances("trainingSet", fvWekaAttributes, trainingSet.size()); result.setClass(ClassAttribute); result.setClassIndex(fvWekaAttributes.size() - 1); for (final double[] instance : trainingSet) { final double[] newInstance = Arrays.copyOf(instance, instance.length + 1); newInstance[newInstance.length - 1] = 0; final Instance wekaInstance = new DenseInstance(1, newInstance); wekaInstance.setDataset(result); result.add(wekaInstance); } return result; }