List of usage examples for weka.core Instances Instances
public Instances(Instances dataset)
From source file:de.ugoe.cs.cpdp.dataprocessing.Oversampling.java
License:Apache License
@Override public void apply(Instances testdata, Instances traindata) { final int[] counts = traindata.attributeStats(traindata.classIndex()).nominalCounts; if (counts[1] < counts[0]) { Instances negatives = new Instances(traindata); Instances positives = new Instances(traindata); for (int i = traindata.size() - 1; i >= 0; i--) { if (Double.compare(1.0, negatives.get(i).classValue()) == 0) { negatives.remove(i);/*from w w w. ja va2 s .c om*/ } if (Double.compare(0.0, positives.get(i).classValue()) == 0) { positives.remove(i); } } Resample resample = new Resample(); resample.setSampleSizePercent((100.0 * counts[0]) / counts[1]); try { resample.setInputFormat(traindata); positives = Filter.useFilter(positives, resample); } catch (Exception e) { throw new RuntimeException(e); } traindata.clear(); for (int i = 0; i < negatives.size(); i++) { traindata.add(negatives.get(i)); } for (int i = 0; i < positives.size(); i++) { traindata.add(positives.get(i)); } } }
From source file:de.ugoe.cs.cpdp.dataprocessing.SimulationFilter.java
License:Apache License
@Override public void apply(Instances testdata, Instances traindata) { Instances newDataSet = new Instances(traindata); traindata.delete();// w w w.j a va 2s. co m HashMap<Double, Instance> artifactNames = new HashMap<Double, Instance>(); // This is to add all data, where the first occurence of the file has a bug ArrayList<Double> firstOccurenceArtifactNames = new ArrayList<Double>(); // Sort dataset (StateID is connected to the date of commit: Lower StateID // means earlier commit than a higher stateID) Attribute wekaAttribute = newDataSet.attribute("Artifact.Target.StateID"); newDataSet.sort(wekaAttribute); /* * Logical summary: If there is an instance that dont have a bug, put it into the hashmap * (only unique values in there) * * If there is an instance, that hava a bug look up if it is in the hashmap already (this * means: it does not had a bug before!): If this is true add it to a new dataset and remove * it from the hashmap, so that new changes from "nonBug" -> "bug" for this file can be * found. * * If the instance has a bug and is not in the hashmap (this means: The file has a bug with * its first occurence or this file only has bugs and not an instance with no bug), then (if * it is not in the arrayList above) add it to the new dataset. This way it is possible to * get the first occurence of a file, which has a bug */ for (int i = 0; i < newDataSet.numInstances(); i++) { Instance wekaInstance = newDataSet.instance(i); double newBugLabel = wekaInstance.classValue(); Attribute wekaArtifactName = newDataSet.attribute("Artifact.Name"); Double artifactName = wekaInstance.value(wekaArtifactName); if (newBugLabel == 0.0 && artifactNames.keySet().contains(artifactName)) { artifactNames.put(artifactName, wekaInstance); } else if (newBugLabel == 0.0 && !artifactNames.keySet().contains(artifactName)) { artifactNames.put(artifactName, wekaInstance); } else if (newBugLabel == 1.0 && artifactNames.keySet().contains(artifactName)) { traindata.add(wekaInstance); artifactNames.remove(artifactName); } else if (newBugLabel == 1.0 && !artifactNames.keySet().contains(artifactName)) { if (!firstOccurenceArtifactNames.contains(artifactName)) { traindata.add(wekaInstance); firstOccurenceArtifactNames.add(artifactName); } } } // If we have a file, that never had a bug (this is, when it is NOT in the // new created dataset, but it is in the HashMap from above) add it to // the new dataset double[] artifactNamesinNewDataSet = traindata.attributeToDoubleArray(0); HashMap<Double, Instance> artifactNamesCopy = new HashMap<Double, Instance>(artifactNames); for (Double artifactName : artifactNames.keySet()) { for (int i = 0; i < artifactNamesinNewDataSet.length; i++) { if (artifactNamesinNewDataSet[i] == artifactName) { artifactNamesCopy.remove(artifactName); } } } for (Double artifact : artifactNamesCopy.keySet()) { traindata.add(artifactNamesCopy.get(artifact)); } }
From source file:de.ugoe.cs.cpdp.dataselection.CLIFF.java
License:Apache License
/** * <p>//from ww w .j av a 2s .c o m * Applies the CLIFF relevancy filter to the data. * </p> * * @param data * the data * @return CLIFF-filtered data */ protected Instances applyCLIFF(Instances data) { final double[][] powerAttributes = new double[data.size()][data.numAttributes()]; final double[] powerEntity = new double[data.size()]; final int[] counts = data.attributeStats(data.classIndex()).nominalCounts; final double probDefect = data.numInstances() / (double) counts[1]; for (int j = 0; j < data.numAttributes(); j++) { if (data.attribute(j) != data.classAttribute()) { final double[] ranges = getRanges(data, j); final double[] probDefectRange = getRangeProbabilities(data, j, ranges); for (int i = 0; i < data.numInstances(); i++) { final double value = data.instance(i).value(j); final int range = determineRange(ranges, value); double probClass, probNotClass, probRangeClass, probRangeNotClass; if (data.instance(i).classValue() == 1) { probClass = probDefect; probNotClass = 1.0 - probDefect; probRangeClass = probDefectRange[range]; probRangeNotClass = 1.0 - probDefectRange[range]; } else { probClass = 1.0 - probDefect; probNotClass = probDefect; probRangeClass = 1.0 - probDefectRange[range]; probRangeNotClass = probDefectRange[range]; } powerAttributes[i][j] = Math.pow(probRangeClass, 2.0) / (probRangeClass * probClass + probRangeNotClass * probNotClass); } } } for (int i = 0; i < data.numInstances(); i++) { powerEntity[i] = 1.0; for (int j = 0; j < data.numAttributes(); j++) { powerEntity[i] *= powerAttributes[i][j]; } } double[] sortedPower = powerEntity.clone(); Arrays.sort(sortedPower); double cutOff = sortedPower[(int) (data.numInstances() * (1 - percentage))]; final Instances selected = new Instances(data); selected.delete(); for (int i = 0; i < data.numInstances(); i++) { if (powerEntity[i] >= cutOff) { selected.add(data.instance(i)); } } return selected; }
From source file:de.ugoe.cs.cpdp.dataselection.DBSCANFilter.java
License:Apache License
/** * @see de.ugoe.cs.cpdp.dataselection.PointWiseDataselectionStrategy#apply(weka.core.Instances, * weka.core.Instances)/*from ww w. java2 s . co m*/ */ @Override public Instances apply(Instances testdata, Instances traindata) { Instances filteredTraindata = new Instances(traindata); filteredTraindata.clear(); double[][] data = new double[testdata.size() + traindata.size()][testdata.numAttributes() - 1]; int classIndex = testdata.classIndex(); for (int i = 0; i < testdata.size(); i++) { int k = 0; for (int j = 0; j < testdata.numAttributes(); j++) { if (j != classIndex) { data[i][k] = testdata.get(i).value(j); k++; } } } for (int i = 0; i < traindata.size(); i++) { int k = 0; for (int j = 0; j < traindata.numAttributes(); j++) { if (j != classIndex) { data[i + testdata.size()][k] = traindata.get(i).value(j); k++; } } } DatabaseConnection dbc = new ArrayAdapterDatabaseConnection(data); Database db = new StaticArrayDatabase(dbc, null); db.initialize(); DBSCAN<DoubleVector> dbscan = new DBSCAN<DoubleVector>(EuclideanDistanceFunction.STATIC, 1.0, 10); Clustering<Model> clusterer = dbscan.run(db); Relation<DoubleVector> rel = db.getRelation(TypeUtil.DOUBLE_VECTOR_FIELD); int firstInternalIndex = rel.iterDBIDs().internalGetIndex(); for (Cluster<Model> cluster : clusterer.getAllClusters()) { // check if cluster has any training data DBIDIter iter = rel.iterDBIDs(); boolean noMatch = true; for (int i = 0; noMatch && i < testdata.size(); i++) { noMatch = !cluster.getIDs().contains(iter); iter.advance(); } if (!noMatch) { // cluster contains test data for (DBIDIter clusterIter = cluster.getIDs().iter(); clusterIter.valid(); clusterIter.advance()) { int internalIndex = clusterIter.internalGetIndex() - testdata.size() - firstInternalIndex; if (internalIndex >= 0) { // index belongs to a training instance filteredTraindata.add(traindata.get(internalIndex)); } } } } return filteredTraindata; }
From source file:de.ugoe.cs.cpdp.dataselection.LACE2.java
License:Apache License
@Override public void apply(Instances testdata, SetUniqueList<Instances> traindataSet) { Instances selectedData = new Instances(testdata); selectedData.clear();//from ww w . j a v a 2 s. c o m LinkedList<Instances> traindataCopy = new LinkedList<>(traindataSet); Collections.shuffle(traindataCopy); CLIFF cliff = new CLIFF(); cliff.setParameter(Double.toString(percentage)); MORPH morph = new MORPH(); Median median = new Median(); double minDist = Double.MIN_VALUE; for (Instances traindata : traindataCopy) { Instances cliffedData = cliff.applyCLIFF(traindata); if (minDist == Double.MIN_VALUE) { // determine distance for leader-follower algorithm Instances sample; if (traindata.size() > 100) { Resample resample = new Resample(); resample.setSampleSizePercent(100.0 / traindata.size() * 100.0); resample.setBiasToUniformClass(0.0); resample.setNoReplacement(true); try { resample.setInputFormat(traindata); sample = Filter.useFilter(traindata, resample); } catch (Exception e) { throw new RuntimeException(e); } } else { sample = new Instances(traindata); } double[] distances = new double[sample.size()]; for (int i = 0; i < sample.size(); i++) { Instance unlikeNeighbor = morph.getNearestUnlikeNeighbor(sample.get(i), sample); distances[i] = MathArrays.distance(WekaUtils.instanceValues(sample.get(i)), WekaUtils.instanceValues(unlikeNeighbor)); } minDist = median.evaluate(distances); } for (int i = 0; i < cliffedData.size(); i++) { Instance unlikeNeighbor = morph.getNearestUnlikeNeighbor(cliffedData.get(i), selectedData); if (unlikeNeighbor == null) { selectedData.add(cliffedData.get(i)); } else { double distance = MathArrays.distance(WekaUtils.instanceValues(cliffedData.get(i)), WekaUtils.instanceValues(unlikeNeighbor)); if (distance > minDist) { morph.morphInstance(cliffedData.get(i), cliffedData); selectedData.add(cliffedData.get(i)); } } } } }
From source file:de.ugoe.cs.cpdp.dataselection.NeighborhoodFilter.java
License:Apache License
/** * <p>//from w w w. jav a2 s. co m * Applies the relevancy filter after Ryu et al. * </p> * * @param testdata * test data * @param traindata * training data * @return filtered trainind data */ private Instances applyNeighborhoodFilter(Instances testdata, Instances traindata) { TreeSet<Integer> selectedInstances = new TreeSet<>(); for (int i = 0; i < testdata.size(); i++) { double minHam = Double.MAX_VALUE; for (int j = 0; j < traindata.size(); j++) { double distance = WekaUtils.hammingDistance(testdata.get(i), traindata.get(j)); if (distance < minHam) { minHam = distance; } } for (int j = 0; j < traindata.size(); j++) { double distance = WekaUtils.hammingDistance(testdata.get(i), traindata.get(j)); if (distance <= minHam) { selectedInstances.add(j); } } } Instances selectedTraindata = new Instances(testdata); selectedTraindata.clear(); for (Integer index : selectedInstances) { selectedTraindata.add(traindata.instance(index)); } return selectedTraindata; }
From source file:de.ugoe.cs.cpdp.execution.AbstractCrossProjectExperiment.java
License:Apache License
/** * Helper method that combines a set of Weka {@link Instances} sets into a single * {@link Instances} set./* w w w. ja v a 2s. c o m*/ * * @param traindataSet * set of {@link Instances} to be combines * @return single {@link Instances} set */ public static Instances makeSingleTrainingSet(SetUniqueList<Instances> traindataSet) { Instances traindataFull = null; for (Instances traindata : traindataSet) { if (traindataFull == null) { traindataFull = new Instances(traindata); } else { for (int i = 0; i < traindata.numInstances(); i++) { traindataFull.add(traindata.instance(i)); } } } return traindataFull; }
From source file:de.ugoe.cs.cpdp.execution.ClassifierCreationExperiment.java
License:Apache License
/** * Executes the experiment with the steps as described in the class comment. * /* w w w . ja v a 2 s. com*/ * @see Runnable#run() */ @Override public void run() { final List<SoftwareVersion> versions = new LinkedList<>(); boolean writeHeader = true; for (IVersionLoader loader : config.getLoaders()) { versions.addAll(loader.load()); } File resultsDir = new File(config.getResultsPath()); if (!resultsDir.exists()) { resultsDir.mkdir(); } int versionCount = 1; for (SoftwareVersion testVersion : versions) { // At first: traindata == testdata Instances testdata = testVersion.getInstances(); Instances traindata = new Instances(testdata); List<Double> efforts = testVersion.getEfforts(); // Give the dataset a new name testdata.setRelationName(testVersion.getProject()); for (IProcessesingStrategy processor : config.getPreProcessors()) { Console.traceln(Level.FINE, String.format("[%s] [%02d/%02d] %s: applying preprocessor %s", config.getExperimentName(), versionCount, versions.size(), testVersion.getProject(), processor.getClass().getName())); processor.apply(testdata, traindata); } for (IPointWiseDataselectionStrategy dataselector : config.getPointWiseSelectors()) { Console.traceln(Level.FINE, String.format("[%s] [%02d/%02d] %s: applying pointwise selection %s", config.getExperimentName(), versionCount, versions.size(), testVersion.getProject(), dataselector.getClass().getName())); traindata = dataselector.apply(testdata, traindata); } for (IProcessesingStrategy processor : config.getPostProcessors()) { Console.traceln(Level.FINE, String.format("[%s] [%02d/%02d] %s: applying setwise postprocessor %s", config.getExperimentName(), versionCount, versions.size(), testVersion.getProject(), processor.getClass().getName())); processor.apply(testdata, traindata); } // Trainerlist for evaluation later on List<ITrainer> allTrainers = new LinkedList<>(); for (ITrainingStrategy trainer : config.getTrainers()) { // Add trainer to list for evaluation allTrainers.add(trainer); // Train classifier trainer.apply(traindata); if (config.getSaveClassifier()) { // If classifier should be saved, train him and save him // be careful with typecasting here! IWekaCompatibleTrainer trainerToSave = (IWekaCompatibleTrainer) trainer; // Console.println(trainerToSave.getClassifier().toString()); try { weka.core.SerializationHelper.write(resultsDir.getAbsolutePath() + "/" + trainer.getName() + "-" + testVersion.getProject(), trainerToSave.getClassifier()); } catch (Exception e) { e.printStackTrace(); } } } for (IEvaluationStrategy evaluator : config.getEvaluators()) { Console.traceln(Level.FINE, String.format("[%s] [%02d/%02d] %s: applying evaluator %s", config.getExperimentName(), versionCount, versions.size(), testVersion.getProject(), evaluator.getClass().getName())); if (writeHeader) { evaluator.setParameter(config.getResultsPath() + "/" + config.getExperimentName() + ".csv"); } evaluator.apply(testdata, traindata, allTrainers, efforts, writeHeader, config.getResultStorages()); writeHeader = false; } versionCount++; Console.traceln(Level.INFO, String.format("[%s] [%02d/%02d] %s: finished", config.getExperimentName(), versionCount, versions.size(), testVersion.getProject())); } }
From source file:de.ugoe.cs.cpdp.execution.WithinProjectOrderedSplitExperiment.java
License:Apache License
/** * Executes the experiment with the steps as described in the class comment. * /* w w w .j a va2 s. c om*/ * @see Runnable#run() */ @Override public void run() { final List<SoftwareVersion> versions = new LinkedList<>(); for (IVersionLoader loader : config.getLoaders()) { versions.addAll(loader.load()); } for (IVersionFilter filter : config.getVersionFilters()) { filter.apply(versions); } boolean writeHeader = true; int versionCount = 1; int testVersionCount = 0; int numTrainers = 0; for (SoftwareVersion testVersion : versions) { if (isVersion(testVersion, config.getTestVersionFilters())) { testVersionCount++; } } numTrainers += config.getSetWiseTrainers().size(); numTrainers += config.getSetWiseTestdataAwareTrainers().size(); numTrainers += config.getTrainers().size(); numTrainers += config.getTestAwareTrainers().size(); // sort versions Collections.sort(versions); for (SoftwareVersion testVersion : versions) { if (isVersion(testVersion, config.getTestVersionFilters())) { Console.traceln(Level.INFO, String.format("[%s] [%02d/%02d] %s: starting", config.getExperimentName(), versionCount, testVersionCount, testVersion.getVersion())); int numResultsAvailable = resultsAvailable(testVersion); if (numResultsAvailable >= numTrainers * config.getRepetitions()) { Console.traceln(Level.INFO, String.format("[%s] [%02d/%02d] %s: results already available; skipped", config.getExperimentName(), versionCount, testVersionCount, testVersion.getVersion())); versionCount++; continue; } // Setup testdata and training data Instances testdata = testVersion.getInstances(); List<Double> efforts = testVersion.getEfforts(); // now split data into parts double percentage = 0.5; // 0.5 as default value String param = config.getExecutionStrategyParameters(); if (config.getExecutionStrategyParameters() != null) { try { percentage = Double.parseDouble(param); } catch (NumberFormatException e) { throw new RuntimeException( "invalid execution strategy parameter, must be numeric: " + param); } } int initialTestSize = testdata.size(); Instances traindata = new Instances(testdata); for (int i = initialTestSize - 1; i >= 0; i--) { if ((((double) i) / initialTestSize) < percentage) { testdata.delete(i); if (efforts != null) { efforts.remove(i); } } else { traindata.delete(i); } } for (IProcessesingStrategy processor : config.getPreProcessors()) { Console.traceln(Level.FINE, String.format("[%s] [%02d/%02d] %s: applying preprocessor %s", config.getExperimentName(), versionCount, testVersionCount, testVersion.getVersion(), processor.getClass().getName())); processor.apply(testdata, traindata); } for (IPointWiseDataselectionStrategy dataselector : config.getPointWiseSelectors()) { Console.traceln(Level.FINE, String.format("[%s] [%02d/%02d] %s: applying pointwise selection %s", config.getExperimentName(), versionCount, testVersionCount, testVersion.getVersion(), dataselector.getClass().getName())); traindata = dataselector.apply(testdata, traindata); } for (IProcessesingStrategy processor : config.getPostProcessors()) { Console.traceln(Level.FINE, String.format("[%s] [%02d/%02d] %s: applying setwise postprocessor %s", config.getExperimentName(), versionCount, testVersionCount, testVersion.getVersion(), processor.getClass().getName())); processor.apply(testdata, traindata); } for (ITrainingStrategy trainer : config.getTrainers()) { Console.traceln(Level.FINE, String.format("[%s] [%02d/%02d] %s: applying trainer %s", config.getExperimentName(), versionCount, testVersionCount, testVersion.getVersion(), trainer.getName())); trainer.apply(traindata); } for (ITestAwareTrainingStrategy trainer : config.getTestAwareTrainers()) { Console.traceln(Level.FINE, String.format("[%s] [%02d/%02d] %s: applying trainer %s", config.getExperimentName(), versionCount, testVersionCount, testVersion.getVersion(), trainer.getName())); trainer.apply(testdata, traindata); } File resultsDir = new File(config.getResultsPath()); if (!resultsDir.exists()) { resultsDir.mkdir(); } for (IEvaluationStrategy evaluator : config.getEvaluators()) { Console.traceln(Level.FINE, String.format("[%s] [%02d/%02d] %s: applying evaluator %s", config.getExperimentName(), versionCount, testVersionCount, testVersion.getVersion(), evaluator.getClass().getName())); List<ITrainer> allTrainers = new LinkedList<>(); for (ITrainingStrategy trainer : config.getTrainers()) { allTrainers.add(trainer); } for (ITestAwareTrainingStrategy trainer : config.getTestAwareTrainers()) { allTrainers.add(trainer); } if (writeHeader) { evaluator.setParameter(config.getResultsPath() + "/" + config.getExperimentName() + ".csv"); } evaluator.apply(testdata, traindata, allTrainers, efforts, writeHeader, config.getResultStorages()); writeHeader = false; } Console.traceln(Level.INFO, String.format("[%s] [%02d/%02d] %s: finished", config.getExperimentName(), versionCount, testVersionCount, testVersion.getVersion())); versionCount++; } } }
From source file:de.ugoe.cs.cpdp.loader.DecentDataLoader.java
License:Apache License
/** * Loads the given decent file and tranform it from decent->arffx->arff * /*from ww w . j a v a 2 s . c o m*/ * @return Instances in WEKA format */ @Override public Instances load(File file) { // Set attributeFilter setAttributeFilter(); // Register MetaModels try { registerMetaModels(); } catch (Exception e1) { Console.printerrln("Metamodels cannot be registered!"); e1.printStackTrace(); } // Set location of decent and arffx Model String decentModelLocation = file.getAbsolutePath(); String pathToDecentModelFolder = decentModelLocation.substring(0, decentModelLocation.lastIndexOf(File.separator)); String arffxModelLocation = pathToDecentModelFolder + "/model.arffx"; String logModelLocation = pathToDecentModelFolder + "/model.log"; String arffLocation = pathToDecentModelFolder + "/model.arff"; // If arff File exists, load from it! if (new File(arffLocation).exists()) { System.out.println("Loading arff File..."); BufferedReader reader; Instances data = null; try { reader = new BufferedReader(new FileReader(arffLocation)); data = new Instances(reader); reader.close(); } catch (FileNotFoundException e) { Console.printerrln("File with path: " + arffLocation + " was not found."); throw new RuntimeException(e); } catch (IOException e) { Console.printerrln("File with path: " + arffLocation + " cannot be read."); throw new RuntimeException(e); } // Set class attribute if not set if (data.classIndex() == -1) { Attribute classAttribute = data.attribute(classAttributeName); data.setClass(classAttribute); } return data; } // Location of EOL Scripts String preprocess = "./decent/epsilon/query/preprocess.eol"; String arffxToArffSource = "./decent/epsilon/query/addLabels.eol"; // Set Log Properties System.setProperty("epsilon.logLevel", logLevel); System.setProperty("epsilon.logToFile", logToFile); System.setProperty("epsilon.logFileAvailable", "false"); // Set decent2arffx Properties System.setProperty("epsilon.transformation.decent2arffx.skipSource", "false"); System.setProperty("epsilon.transformation.decent2arffx.type", "code"); // Preprocess Data, transform from decent2arffx try { IEolExecutableModule preProcessModule = loadModule(preprocess); IModel preProcessDecentModel = modelHandler.getDECENTModel(decentModelLocation, true, true); IModel preProcessArffxarffxModel = modelHandler.getARFFxModel(arffxModelLocation, false, true); preProcessModule.getContext().getModelRepository().addModel(preProcessDecentModel); preProcessModule.getContext().getModelRepository().addModel(preProcessArffxarffxModel); execute(preProcessModule, logModelLocation); preProcessDecentModel.dispose(); preProcessArffxarffxModel.dispose(); preProcessModule.reset(); } catch (URISyntaxException e) { Console.printerrln("URI Syntax for decent or arffx model is wrong."); e.printStackTrace(); } catch (Exception e) { e.printStackTrace(); } // Transform to arff, for label and confidence attributes try { IEolExecutableModule arffxToArffModule = loadModule(arffxToArffSource); IModel arffxToArffArffxModel = modelHandler.getARFFxModel(arffxModelLocation, true, true); arffxToArffModule.getContext().getModelRepository().addModel(arffxToArffArffxModel); execute(arffxToArffModule, logModelLocation); arffxToArffArffxModel.dispose(); // can be stored and retained alternatively arffxToArffModule.reset(); } catch (URISyntaxException e) { Console.printerrln("URI Syntax for arffx model is wrong."); e.printStackTrace(); } catch (Exception e) { e.printStackTrace(); } // Unregister MetaModels, otherwise cast will fail HashMap<String, Object> metaModelCache = new HashMap<>(); for (String key : EPackage.Registry.INSTANCE.keySet()) { metaModelCache.put(key, EPackage.Registry.INSTANCE.get(key)); } ; for (String key : metaModelCache.keySet()) { EPackage.Registry.INSTANCE.remove(key); } ; // Workaround to gernerate a usable URI. Absolute path is not // possible, therefore we need to construct a relative path URL location = DecentDataLoader.class.getProtectionDomain().getCodeSource().getLocation(); String basePath = location.getFile(); // Location is the bin folder, so we need to delete the last 4 characters basePath = basePath.substring(0, basePath.length() - 4); String relativePath = new File(basePath).toURI().relativize(new File(arffxModelLocation).toURI()).getPath(); // Loard arffx file and create WEKA Instances ARFFxResourceTool tool = new ARFFxResourceTool(); Resource resource = tool.loadResourceFromXMI(relativePath, "arffx"); Instances dataSet = null; for (EObject o : resource.getContents()) { Model m = (Model) o; dataSet = createWekaDataFormat(m); for (Instance i : m.getData()) { createWekaInstance(dataSet, i); } } // Set class attribute Attribute classAttribute = dataSet.attribute(classAttributeName); dataSet.setClass(classAttribute); // Save as ARFF save(dataSet, arffLocation); return dataSet; }