Example usage for weka.core Instances Instances

List of usage examples for weka.core Instances Instances

Introduction

In this page you can find the example usage for weka.core Instances Instances.

Prototype

public Instances(Instances dataset) 

Source Link

Document

Constructor copying all instances and references to the header information from the given set of instances.

Usage

From source file:de.ugoe.cs.cpdp.dataprocessing.Oversampling.java

License:Apache License

@Override
public void apply(Instances testdata, Instances traindata) {

    final int[] counts = traindata.attributeStats(traindata.classIndex()).nominalCounts;
    if (counts[1] < counts[0]) {
        Instances negatives = new Instances(traindata);
        Instances positives = new Instances(traindata);

        for (int i = traindata.size() - 1; i >= 0; i--) {
            if (Double.compare(1.0, negatives.get(i).classValue()) == 0) {
                negatives.remove(i);/*from   w w w.  ja va2 s  .c  om*/
            }
            if (Double.compare(0.0, positives.get(i).classValue()) == 0) {
                positives.remove(i);
            }
        }

        Resample resample = new Resample();
        resample.setSampleSizePercent((100.0 * counts[0]) / counts[1]);
        try {
            resample.setInputFormat(traindata);
            positives = Filter.useFilter(positives, resample);
        } catch (Exception e) {
            throw new RuntimeException(e);
        }
        traindata.clear();
        for (int i = 0; i < negatives.size(); i++) {
            traindata.add(negatives.get(i));
        }
        for (int i = 0; i < positives.size(); i++) {
            traindata.add(positives.get(i));
        }
    }
}

From source file:de.ugoe.cs.cpdp.dataprocessing.SimulationFilter.java

License:Apache License

@Override
public void apply(Instances testdata, Instances traindata) {
    Instances newDataSet = new Instances(traindata);
    traindata.delete();//  w  w w.j  a  va  2s. co  m

    HashMap<Double, Instance> artifactNames = new HashMap<Double, Instance>();

    // This is to add all data, where the first occurence of the file has a bug
    ArrayList<Double> firstOccurenceArtifactNames = new ArrayList<Double>();

    // Sort dataset (StateID is connected to the date of commit: Lower StateID
    // means earlier commit than a higher stateID)
    Attribute wekaAttribute = newDataSet.attribute("Artifact.Target.StateID");
    newDataSet.sort(wekaAttribute);

    /*
     * Logical summary: If there is an instance that dont have a bug, put it into the hashmap
     * (only unique values in there)
     * 
     * If there is an instance, that hava a bug look up if it is in the hashmap already (this
     * means: it does not had a bug before!): If this is true add it to a new dataset and remove
     * it from the hashmap, so that new changes from "nonBug" -> "bug" for this file can be
     * found.
     * 
     * If the instance has a bug and is not in the hashmap (this means: The file has a bug with
     * its first occurence or this file only has bugs and not an instance with no bug), then (if
     * it is not in the arrayList above) add it to the new dataset. This way it is possible to
     * get the first occurence of a file, which has a bug
     */
    for (int i = 0; i < newDataSet.numInstances(); i++) {
        Instance wekaInstance = newDataSet.instance(i);

        double newBugLabel = wekaInstance.classValue();
        Attribute wekaArtifactName = newDataSet.attribute("Artifact.Name");
        Double artifactName = wekaInstance.value(wekaArtifactName);

        if (newBugLabel == 0.0 && artifactNames.keySet().contains(artifactName)) {
            artifactNames.put(artifactName, wekaInstance);
        } else if (newBugLabel == 0.0 && !artifactNames.keySet().contains(artifactName)) {
            artifactNames.put(artifactName, wekaInstance);
        } else if (newBugLabel == 1.0 && artifactNames.keySet().contains(artifactName)) {
            traindata.add(wekaInstance);
            artifactNames.remove(artifactName);
        } else if (newBugLabel == 1.0 && !artifactNames.keySet().contains(artifactName)) {
            if (!firstOccurenceArtifactNames.contains(artifactName)) {
                traindata.add(wekaInstance);
                firstOccurenceArtifactNames.add(artifactName);
            }
        }
    }

    // If we have a file, that never had a bug (this is, when it is NOT in the
    // new created dataset, but it is in the HashMap from above) add it to
    // the new dataset

    double[] artifactNamesinNewDataSet = traindata.attributeToDoubleArray(0);
    HashMap<Double, Instance> artifactNamesCopy = new HashMap<Double, Instance>(artifactNames);

    for (Double artifactName : artifactNames.keySet()) {

        for (int i = 0; i < artifactNamesinNewDataSet.length; i++) {
            if (artifactNamesinNewDataSet[i] == artifactName) {
                artifactNamesCopy.remove(artifactName);
            }
        }
    }

    for (Double artifact : artifactNamesCopy.keySet()) {
        traindata.add(artifactNamesCopy.get(artifact));
    }

}

From source file:de.ugoe.cs.cpdp.dataselection.CLIFF.java

License:Apache License

/**
 * <p>//from ww  w  .j av  a  2s .c  o m
 * Applies the CLIFF relevancy filter to the data.
 * </p>
 *
 * @param data
 *            the data
 * @return CLIFF-filtered data
 */
protected Instances applyCLIFF(Instances data) {
    final double[][] powerAttributes = new double[data.size()][data.numAttributes()];
    final double[] powerEntity = new double[data.size()];

    final int[] counts = data.attributeStats(data.classIndex()).nominalCounts;
    final double probDefect = data.numInstances() / (double) counts[1];

    for (int j = 0; j < data.numAttributes(); j++) {
        if (data.attribute(j) != data.classAttribute()) {
            final double[] ranges = getRanges(data, j);
            final double[] probDefectRange = getRangeProbabilities(data, j, ranges);

            for (int i = 0; i < data.numInstances(); i++) {
                final double value = data.instance(i).value(j);
                final int range = determineRange(ranges, value);
                double probClass, probNotClass, probRangeClass, probRangeNotClass;
                if (data.instance(i).classValue() == 1) {
                    probClass = probDefect;
                    probNotClass = 1.0 - probDefect;
                    probRangeClass = probDefectRange[range];
                    probRangeNotClass = 1.0 - probDefectRange[range];
                } else {
                    probClass = 1.0 - probDefect;
                    probNotClass = probDefect;
                    probRangeClass = 1.0 - probDefectRange[range];
                    probRangeNotClass = probDefectRange[range];
                }
                powerAttributes[i][j] = Math.pow(probRangeClass, 2.0)
                        / (probRangeClass * probClass + probRangeNotClass * probNotClass);
            }
        }
    }

    for (int i = 0; i < data.numInstances(); i++) {
        powerEntity[i] = 1.0;
        for (int j = 0; j < data.numAttributes(); j++) {
            powerEntity[i] *= powerAttributes[i][j];
        }
    }
    double[] sortedPower = powerEntity.clone();
    Arrays.sort(sortedPower);
    double cutOff = sortedPower[(int) (data.numInstances() * (1 - percentage))];

    final Instances selected = new Instances(data);
    selected.delete();
    for (int i = 0; i < data.numInstances(); i++) {
        if (powerEntity[i] >= cutOff) {
            selected.add(data.instance(i));
        }
    }
    return selected;
}

From source file:de.ugoe.cs.cpdp.dataselection.DBSCANFilter.java

License:Apache License

/**
 * @see de.ugoe.cs.cpdp.dataselection.PointWiseDataselectionStrategy#apply(weka.core.Instances,
 *      weka.core.Instances)/*from ww  w. java2  s .  co  m*/
 */
@Override
public Instances apply(Instances testdata, Instances traindata) {
    Instances filteredTraindata = new Instances(traindata);
    filteredTraindata.clear();

    double[][] data = new double[testdata.size() + traindata.size()][testdata.numAttributes() - 1];
    int classIndex = testdata.classIndex();
    for (int i = 0; i < testdata.size(); i++) {
        int k = 0;
        for (int j = 0; j < testdata.numAttributes(); j++) {
            if (j != classIndex) {
                data[i][k] = testdata.get(i).value(j);
                k++;
            }
        }
    }
    for (int i = 0; i < traindata.size(); i++) {
        int k = 0;
        for (int j = 0; j < traindata.numAttributes(); j++) {
            if (j != classIndex) {
                data[i + testdata.size()][k] = traindata.get(i).value(j);
                k++;
            }
        }
    }
    DatabaseConnection dbc = new ArrayAdapterDatabaseConnection(data);
    Database db = new StaticArrayDatabase(dbc, null);
    db.initialize();
    DBSCAN<DoubleVector> dbscan = new DBSCAN<DoubleVector>(EuclideanDistanceFunction.STATIC, 1.0, 10);
    Clustering<Model> clusterer = dbscan.run(db);
    Relation<DoubleVector> rel = db.getRelation(TypeUtil.DOUBLE_VECTOR_FIELD);
    int firstInternalIndex = rel.iterDBIDs().internalGetIndex();

    for (Cluster<Model> cluster : clusterer.getAllClusters()) {
        // check if cluster has any training data
        DBIDIter iter = rel.iterDBIDs();
        boolean noMatch = true;
        for (int i = 0; noMatch && i < testdata.size(); i++) {
            noMatch = !cluster.getIDs().contains(iter);
            iter.advance();
        }
        if (!noMatch) {
            // cluster contains test data
            for (DBIDIter clusterIter = cluster.getIDs().iter(); clusterIter.valid(); clusterIter.advance()) {
                int internalIndex = clusterIter.internalGetIndex() - testdata.size() - firstInternalIndex;
                if (internalIndex >= 0) {
                    // index belongs to a training instance
                    filteredTraindata.add(traindata.get(internalIndex));
                }
            }

        }
    }

    return filteredTraindata;
}

From source file:de.ugoe.cs.cpdp.dataselection.LACE2.java

License:Apache License

@Override
public void apply(Instances testdata, SetUniqueList<Instances> traindataSet) {
    Instances selectedData = new Instances(testdata);
    selectedData.clear();//from  ww w  .  j  a  v  a 2  s.  c  o m

    LinkedList<Instances> traindataCopy = new LinkedList<>(traindataSet);
    Collections.shuffle(traindataCopy);

    CLIFF cliff = new CLIFF();
    cliff.setParameter(Double.toString(percentage));
    MORPH morph = new MORPH();
    Median median = new Median();
    double minDist = Double.MIN_VALUE;

    for (Instances traindata : traindataCopy) {
        Instances cliffedData = cliff.applyCLIFF(traindata);
        if (minDist == Double.MIN_VALUE) {
            // determine distance for leader-follower algorithm
            Instances sample;
            if (traindata.size() > 100) {
                Resample resample = new Resample();
                resample.setSampleSizePercent(100.0 / traindata.size() * 100.0);
                resample.setBiasToUniformClass(0.0);
                resample.setNoReplacement(true);
                try {
                    resample.setInputFormat(traindata);
                    sample = Filter.useFilter(traindata, resample);
                } catch (Exception e) {
                    throw new RuntimeException(e);
                }
            } else {
                sample = new Instances(traindata);
            }
            double[] distances = new double[sample.size()];
            for (int i = 0; i < sample.size(); i++) {
                Instance unlikeNeighbor = morph.getNearestUnlikeNeighbor(sample.get(i), sample);
                distances[i] = MathArrays.distance(WekaUtils.instanceValues(sample.get(i)),
                        WekaUtils.instanceValues(unlikeNeighbor));
            }
            minDist = median.evaluate(distances);
        }
        for (int i = 0; i < cliffedData.size(); i++) {
            Instance unlikeNeighbor = morph.getNearestUnlikeNeighbor(cliffedData.get(i), selectedData);
            if (unlikeNeighbor == null) {
                selectedData.add(cliffedData.get(i));
            } else {
                double distance = MathArrays.distance(WekaUtils.instanceValues(cliffedData.get(i)),
                        WekaUtils.instanceValues(unlikeNeighbor));
                if (distance > minDist) {
                    morph.morphInstance(cliffedData.get(i), cliffedData);
                    selectedData.add(cliffedData.get(i));
                }
            }
        }
    }
}

From source file:de.ugoe.cs.cpdp.dataselection.NeighborhoodFilter.java

License:Apache License

/**
 * <p>//from   w w  w. jav  a2  s.  co m
 * Applies the relevancy filter after Ryu et al.
 * </p>
 *
 * @param testdata
 *            test data
 * @param traindata
 *            training data
 * @return filtered trainind data
 */
private Instances applyNeighborhoodFilter(Instances testdata, Instances traindata) {
    TreeSet<Integer> selectedInstances = new TreeSet<>();
    for (int i = 0; i < testdata.size(); i++) {
        double minHam = Double.MAX_VALUE;
        for (int j = 0; j < traindata.size(); j++) {
            double distance = WekaUtils.hammingDistance(testdata.get(i), traindata.get(j));
            if (distance < minHam) {
                minHam = distance;
            }
        }
        for (int j = 0; j < traindata.size(); j++) {
            double distance = WekaUtils.hammingDistance(testdata.get(i), traindata.get(j));
            if (distance <= minHam) {
                selectedInstances.add(j);
            }
        }
    }
    Instances selectedTraindata = new Instances(testdata);
    selectedTraindata.clear();
    for (Integer index : selectedInstances) {
        selectedTraindata.add(traindata.instance(index));
    }
    return selectedTraindata;
}

From source file:de.ugoe.cs.cpdp.execution.AbstractCrossProjectExperiment.java

License:Apache License

/**
 * Helper method that combines a set of Weka {@link Instances} sets into a single
 * {@link Instances} set./* w  w  w. ja  v  a 2s. c  o  m*/
 * 
 * @param traindataSet
 *            set of {@link Instances} to be combines
 * @return single {@link Instances} set
 */
public static Instances makeSingleTrainingSet(SetUniqueList<Instances> traindataSet) {
    Instances traindataFull = null;
    for (Instances traindata : traindataSet) {
        if (traindataFull == null) {
            traindataFull = new Instances(traindata);
        } else {
            for (int i = 0; i < traindata.numInstances(); i++) {
                traindataFull.add(traindata.instance(i));
            }
        }
    }
    return traindataFull;
}

From source file:de.ugoe.cs.cpdp.execution.ClassifierCreationExperiment.java

License:Apache License

/**
 * Executes the experiment with the steps as described in the class comment.
 * /* w w  w  . ja  v  a 2  s.  com*/
 * @see Runnable#run()
 */
@Override
public void run() {
    final List<SoftwareVersion> versions = new LinkedList<>();

    boolean writeHeader = true;

    for (IVersionLoader loader : config.getLoaders()) {
        versions.addAll(loader.load());
    }

    File resultsDir = new File(config.getResultsPath());
    if (!resultsDir.exists()) {
        resultsDir.mkdir();
    }

    int versionCount = 1;
    for (SoftwareVersion testVersion : versions) {

        // At first: traindata == testdata
        Instances testdata = testVersion.getInstances();
        Instances traindata = new Instances(testdata);
        List<Double> efforts = testVersion.getEfforts();

        // Give the dataset a new name
        testdata.setRelationName(testVersion.getProject());

        for (IProcessesingStrategy processor : config.getPreProcessors()) {
            Console.traceln(Level.FINE,
                    String.format("[%s] [%02d/%02d] %s: applying preprocessor %s", config.getExperimentName(),
                            versionCount, versions.size(), testVersion.getProject(),
                            processor.getClass().getName()));
            processor.apply(testdata, traindata);
        }

        for (IPointWiseDataselectionStrategy dataselector : config.getPointWiseSelectors()) {
            Console.traceln(Level.FINE,
                    String.format("[%s] [%02d/%02d] %s: applying pointwise selection %s",
                            config.getExperimentName(), versionCount, versions.size(), testVersion.getProject(),
                            dataselector.getClass().getName()));
            traindata = dataselector.apply(testdata, traindata);
        }

        for (IProcessesingStrategy processor : config.getPostProcessors()) {
            Console.traceln(Level.FINE,
                    String.format("[%s] [%02d/%02d] %s: applying setwise postprocessor %s",
                            config.getExperimentName(), versionCount, versions.size(), testVersion.getProject(),
                            processor.getClass().getName()));
            processor.apply(testdata, traindata);
        }

        // Trainerlist for evaluation later on
        List<ITrainer> allTrainers = new LinkedList<>();

        for (ITrainingStrategy trainer : config.getTrainers()) {

            // Add trainer to list for evaluation
            allTrainers.add(trainer);

            // Train classifier
            trainer.apply(traindata);

            if (config.getSaveClassifier()) {
                // If classifier should be saved, train him and save him
                // be careful with typecasting here!
                IWekaCompatibleTrainer trainerToSave = (IWekaCompatibleTrainer) trainer;
                // Console.println(trainerToSave.getClassifier().toString());
                try {
                    weka.core.SerializationHelper.write(resultsDir.getAbsolutePath() + "/" + trainer.getName()
                            + "-" + testVersion.getProject(), trainerToSave.getClassifier());
                } catch (Exception e) {
                    e.printStackTrace();
                }

            }
        }

        for (IEvaluationStrategy evaluator : config.getEvaluators()) {
            Console.traceln(Level.FINE,
                    String.format("[%s] [%02d/%02d] %s: applying evaluator %s", config.getExperimentName(),
                            versionCount, versions.size(), testVersion.getProject(),
                            evaluator.getClass().getName()));

            if (writeHeader) {
                evaluator.setParameter(config.getResultsPath() + "/" + config.getExperimentName() + ".csv");
            }
            evaluator.apply(testdata, traindata, allTrainers, efforts, writeHeader, config.getResultStorages());
            writeHeader = false;
        }

        versionCount++;

        Console.traceln(Level.INFO, String.format("[%s] [%02d/%02d] %s: finished", config.getExperimentName(),
                versionCount, versions.size(), testVersion.getProject()));

    }

}

From source file:de.ugoe.cs.cpdp.execution.WithinProjectOrderedSplitExperiment.java

License:Apache License

/**
 * Executes the experiment with the steps as described in the class comment.
 * /* w w w .j  a va2  s. c om*/
 * @see Runnable#run()
 */
@Override
public void run() {
    final List<SoftwareVersion> versions = new LinkedList<>();

    for (IVersionLoader loader : config.getLoaders()) {
        versions.addAll(loader.load());
    }

    for (IVersionFilter filter : config.getVersionFilters()) {
        filter.apply(versions);
    }
    boolean writeHeader = true;
    int versionCount = 1;
    int testVersionCount = 0;
    int numTrainers = 0;

    for (SoftwareVersion testVersion : versions) {
        if (isVersion(testVersion, config.getTestVersionFilters())) {
            testVersionCount++;
        }
    }

    numTrainers += config.getSetWiseTrainers().size();
    numTrainers += config.getSetWiseTestdataAwareTrainers().size();
    numTrainers += config.getTrainers().size();
    numTrainers += config.getTestAwareTrainers().size();

    // sort versions
    Collections.sort(versions);

    for (SoftwareVersion testVersion : versions) {
        if (isVersion(testVersion, config.getTestVersionFilters())) {
            Console.traceln(Level.INFO, String.format("[%s] [%02d/%02d] %s: starting",
                    config.getExperimentName(), versionCount, testVersionCount, testVersion.getVersion()));
            int numResultsAvailable = resultsAvailable(testVersion);
            if (numResultsAvailable >= numTrainers * config.getRepetitions()) {
                Console.traceln(Level.INFO,
                        String.format("[%s] [%02d/%02d] %s: results already available; skipped",
                                config.getExperimentName(), versionCount, testVersionCount,
                                testVersion.getVersion()));
                versionCount++;
                continue;
            }

            // Setup testdata and training data
            Instances testdata = testVersion.getInstances();
            List<Double> efforts = testVersion.getEfforts();

            // now split data into parts
            double percentage = 0.5; // 0.5 as default value
            String param = config.getExecutionStrategyParameters();
            if (config.getExecutionStrategyParameters() != null) {
                try {
                    percentage = Double.parseDouble(param);
                } catch (NumberFormatException e) {
                    throw new RuntimeException(
                            "invalid execution strategy parameter, must be numeric: " + param);
                }
            }
            int initialTestSize = testdata.size();
            Instances traindata = new Instances(testdata);
            for (int i = initialTestSize - 1; i >= 0; i--) {
                if ((((double) i) / initialTestSize) < percentage) {
                    testdata.delete(i);
                    if (efforts != null) {
                        efforts.remove(i);
                    }
                } else {
                    traindata.delete(i);
                }
            }

            for (IProcessesingStrategy processor : config.getPreProcessors()) {
                Console.traceln(Level.FINE,
                        String.format("[%s] [%02d/%02d] %s: applying preprocessor %s",
                                config.getExperimentName(), versionCount, testVersionCount,
                                testVersion.getVersion(), processor.getClass().getName()));
                processor.apply(testdata, traindata);
            }
            for (IPointWiseDataselectionStrategy dataselector : config.getPointWiseSelectors()) {
                Console.traceln(Level.FINE,
                        String.format("[%s] [%02d/%02d] %s: applying pointwise selection %s",
                                config.getExperimentName(), versionCount, testVersionCount,
                                testVersion.getVersion(), dataselector.getClass().getName()));
                traindata = dataselector.apply(testdata, traindata);
            }
            for (IProcessesingStrategy processor : config.getPostProcessors()) {
                Console.traceln(Level.FINE,
                        String.format("[%s] [%02d/%02d] %s: applying setwise postprocessor %s",
                                config.getExperimentName(), versionCount, testVersionCount,
                                testVersion.getVersion(), processor.getClass().getName()));
                processor.apply(testdata, traindata);
            }
            for (ITrainingStrategy trainer : config.getTrainers()) {
                Console.traceln(Level.FINE,
                        String.format("[%s] [%02d/%02d] %s: applying trainer %s", config.getExperimentName(),
                                versionCount, testVersionCount, testVersion.getVersion(), trainer.getName()));
                trainer.apply(traindata);
            }
            for (ITestAwareTrainingStrategy trainer : config.getTestAwareTrainers()) {
                Console.traceln(Level.FINE,
                        String.format("[%s] [%02d/%02d] %s: applying trainer %s", config.getExperimentName(),
                                versionCount, testVersionCount, testVersion.getVersion(), trainer.getName()));
                trainer.apply(testdata, traindata);
            }
            File resultsDir = new File(config.getResultsPath());
            if (!resultsDir.exists()) {
                resultsDir.mkdir();
            }
            for (IEvaluationStrategy evaluator : config.getEvaluators()) {
                Console.traceln(Level.FINE,
                        String.format("[%s] [%02d/%02d] %s: applying evaluator %s", config.getExperimentName(),
                                versionCount, testVersionCount, testVersion.getVersion(),
                                evaluator.getClass().getName()));
                List<ITrainer> allTrainers = new LinkedList<>();
                for (ITrainingStrategy trainer : config.getTrainers()) {
                    allTrainers.add(trainer);
                }
                for (ITestAwareTrainingStrategy trainer : config.getTestAwareTrainers()) {
                    allTrainers.add(trainer);
                }
                if (writeHeader) {
                    evaluator.setParameter(config.getResultsPath() + "/" + config.getExperimentName() + ".csv");
                }
                evaluator.apply(testdata, traindata, allTrainers, efforts, writeHeader,
                        config.getResultStorages());
                writeHeader = false;
            }
            Console.traceln(Level.INFO, String.format("[%s] [%02d/%02d] %s: finished",
                    config.getExperimentName(), versionCount, testVersionCount, testVersion.getVersion()));
            versionCount++;
        }
    }
}

From source file:de.ugoe.cs.cpdp.loader.DecentDataLoader.java

License:Apache License

/**
 * Loads the given decent file and tranform it from decent->arffx->arff
 * /*from ww  w  . j a  v a  2  s  . c o m*/
 * @return Instances in WEKA format
 */
@Override
public Instances load(File file) {

    // Set attributeFilter
    setAttributeFilter();

    // Register MetaModels
    try {
        registerMetaModels();
    } catch (Exception e1) {
        Console.printerrln("Metamodels cannot be registered!");
        e1.printStackTrace();
    }

    // Set location of decent and arffx Model
    String decentModelLocation = file.getAbsolutePath();
    String pathToDecentModelFolder = decentModelLocation.substring(0,
            decentModelLocation.lastIndexOf(File.separator));
    String arffxModelLocation = pathToDecentModelFolder + "/model.arffx";
    String logModelLocation = pathToDecentModelFolder + "/model.log";
    String arffLocation = pathToDecentModelFolder + "/model.arff";

    // If arff File exists, load from it!
    if (new File(arffLocation).exists()) {
        System.out.println("Loading arff File...");
        BufferedReader reader;
        Instances data = null;
        try {
            reader = new BufferedReader(new FileReader(arffLocation));
            data = new Instances(reader);
            reader.close();
        } catch (FileNotFoundException e) {
            Console.printerrln("File with path: " + arffLocation + " was not found.");
            throw new RuntimeException(e);
        } catch (IOException e) {
            Console.printerrln("File with path: " + arffLocation + " cannot be read.");
            throw new RuntimeException(e);
        }

        // Set class attribute if not set
        if (data.classIndex() == -1) {
            Attribute classAttribute = data.attribute(classAttributeName);
            data.setClass(classAttribute);
        }

        return data;
    }

    // Location of EOL Scripts
    String preprocess = "./decent/epsilon/query/preprocess.eol";
    String arffxToArffSource = "./decent/epsilon/query/addLabels.eol";

    // Set Log Properties
    System.setProperty("epsilon.logLevel", logLevel);
    System.setProperty("epsilon.logToFile", logToFile);
    System.setProperty("epsilon.logFileAvailable", "false");

    // Set decent2arffx Properties
    System.setProperty("epsilon.transformation.decent2arffx.skipSource", "false");
    System.setProperty("epsilon.transformation.decent2arffx.type", "code");

    // Preprocess Data, transform from decent2arffx
    try {
        IEolExecutableModule preProcessModule = loadModule(preprocess);
        IModel preProcessDecentModel = modelHandler.getDECENTModel(decentModelLocation, true, true);
        IModel preProcessArffxarffxModel = modelHandler.getARFFxModel(arffxModelLocation, false, true);
        preProcessModule.getContext().getModelRepository().addModel(preProcessDecentModel);
        preProcessModule.getContext().getModelRepository().addModel(preProcessArffxarffxModel);
        execute(preProcessModule, logModelLocation);
        preProcessDecentModel.dispose();
        preProcessArffxarffxModel.dispose();
        preProcessModule.reset();
    } catch (URISyntaxException e) {
        Console.printerrln("URI Syntax for decent or arffx model is wrong.");
        e.printStackTrace();
    } catch (Exception e) {
        e.printStackTrace();
    }

    // Transform to arff, for label and confidence attributes
    try {
        IEolExecutableModule arffxToArffModule = loadModule(arffxToArffSource);
        IModel arffxToArffArffxModel = modelHandler.getARFFxModel(arffxModelLocation, true, true);
        arffxToArffModule.getContext().getModelRepository().addModel(arffxToArffArffxModel);
        execute(arffxToArffModule, logModelLocation);
        arffxToArffArffxModel.dispose();
        // can be stored and retained alternatively
        arffxToArffModule.reset();
    } catch (URISyntaxException e) {
        Console.printerrln("URI Syntax for arffx model is wrong.");
        e.printStackTrace();
    } catch (Exception e) {
        e.printStackTrace();
    }

    // Unregister MetaModels, otherwise cast will fail
    HashMap<String, Object> metaModelCache = new HashMap<>();
    for (String key : EPackage.Registry.INSTANCE.keySet()) {
        metaModelCache.put(key, EPackage.Registry.INSTANCE.get(key));
    }
    ;

    for (String key : metaModelCache.keySet()) {
        EPackage.Registry.INSTANCE.remove(key);
    }
    ;

    // Workaround to gernerate a usable URI. Absolute path is not
    // possible, therefore we need to construct a relative path

    URL location = DecentDataLoader.class.getProtectionDomain().getCodeSource().getLocation();
    String basePath = location.getFile();

    // Location is the bin folder, so we need to delete the last 4 characters
    basePath = basePath.substring(0, basePath.length() - 4);
    String relativePath = new File(basePath).toURI().relativize(new File(arffxModelLocation).toURI()).getPath();

    // Loard arffx file and create WEKA Instances
    ARFFxResourceTool tool = new ARFFxResourceTool();
    Resource resource = tool.loadResourceFromXMI(relativePath, "arffx");

    Instances dataSet = null;
    for (EObject o : resource.getContents()) {
        Model m = (Model) o;
        dataSet = createWekaDataFormat(m);

        for (Instance i : m.getData()) {
            createWekaInstance(dataSet, i);
        }
    }

    // Set class attribute
    Attribute classAttribute = dataSet.attribute(classAttributeName);
    dataSet.setClass(classAttribute);

    // Save as ARFF
    save(dataSet, arffLocation);

    return dataSet;

}