Example usage for weka.core Instances Instances

Introduction

In this page you can find the example usage for weka.core Instances Instances.

Prototype

public Instances(String name, ArrayList<Attribute> attInfo, int capacity)

Source Link

Document

Creates an empty set of instances.

Usage

From source file:de.ugoe.cs.cpdp.loader.AUDIChangeLoader.java

License:Apache License

@Override
public Instances load(File file) {
    final String[] lines;
    String[] lineSplit;//from  ww  w.j ava  2  s .c  om
    String[] lineSplitBug;

    try {
        lines = FileTools.getLinesFromFile(file.getAbsolutePath());
    } catch (IOException e) {
        throw new RuntimeException(e);
    }

    // information about bugs are in another file
    String path = file.getAbsolutePath();
    path = path.substring(0, path.length() - 14) + "repro.csv";
    final String[] linesBug;
    try {
        linesBug = FileTools.getLinesFromFile(path);
    } catch (IOException e) {
        throw new RuntimeException(e);
    }

    int revisionIndex = -1;
    int bugIndex = -1;
    lineSplitBug = linesBug[0].split(";");
    for (int j = 0; j < lineSplitBug.length; j++) {
        if (lineSplitBug[j].equals("svnrev")) {
            revisionIndex = j;
        }
        if (lineSplitBug[j].equals("num_bugs_trace")) {
            bugIndex = j;
        }
    }
    if (revisionIndex < 0) {
        throw new RuntimeException("could not find SVN revisions");
    }
    if (bugIndex < 0) {
        throw new RuntimeException("could not find bug information");
    }

    int metricsStartIndex = -1;
    int metricsEndIndex = -1;
    lineSplit = lines[0].split(";");
    for (int j = 0; j < lineSplit.length; j++) {
        if (lineSplit[j].equals("lm_LOC")) {
            metricsStartIndex = j;
        }
        if (lineSplit[j].equals("h_E")) {
            metricsEndIndex = j;
        }
    }
    if (metricsStartIndex < 0) {
        throw new RuntimeException("could not find first metric, i.e., lm_LOC");
    }
    if (metricsEndIndex < 0) {
        throw new RuntimeException("could not find last metric, i.e., h_E");
    }
    int numMetrics = metricsEndIndex - metricsStartIndex + 1;

    // create sets of all filenames and revisions
    SortedMap<EntityRevisionPair, Integer> entityRevisionPairs = new TreeMap<>();
    for (int i = 1; i < linesBug.length; i++) {
        lineSplitBug = linesBug[i].split(";");
        entityRevisionPairs
                .put(new EntityRevisionPair(lineSplitBug[0], Integer.parseInt(lineSplitBug[revisionIndex])), i);
    }

    // prepare weka instances
    final ArrayList<Attribute> atts = new ArrayList<Attribute>();
    lineSplit = lines[0].split(";");
    for (int j = metricsStartIndex; j <= metricsEndIndex; j++) {
        atts.add(new Attribute(lineSplit[j] + "_delta"));
    }
    for (int j = metricsStartIndex; j <= metricsEndIndex; j++) {
        atts.add(new Attribute(lineSplit[j] + "_abs"));
    }
    final ArrayList<String> classAttVals = new ArrayList<String>();
    classAttVals.add("0");
    classAttVals.add("1");
    final Attribute classAtt = new Attribute("bug", classAttVals);
    atts.add(classAtt);

    final Instances data = new Instances(file.getName(), atts, 0);
    data.setClass(classAtt);

    // create data
    String lastFile = null;
    double[] lastValues = null;
    int lastNumBugs = 0;
    for (Entry<EntityRevisionPair, Integer> entry : entityRevisionPairs.entrySet()) {
        try {
            // first get values
            lineSplit = lines[entry.getValue()].split(";");
            lineSplitBug = linesBug[entry.getValue()].split(";");
            int i = 0;
            double[] values = new double[numMetrics];
            for (int j = metricsStartIndex; j <= metricsEndIndex; j++) {
                values[i] = Double.parseDouble(lineSplit[j]);
                i++;
            }
            int numBugs = Integer.parseInt(lineSplitBug[bugIndex]);

            // then check if an entity must be created
            if (entry.getKey().entity.equals(lastFile)) {
                // create new instance
                double[] instanceValues = new double[2 * numMetrics + 1];
                for (int j = 0; j < numMetrics; j++) {
                    instanceValues[j] = values[j] - lastValues[j];
                    instanceValues[j + numMetrics] = values[j];
                }
                // check if any value>0
                boolean changeOccured = false;
                for (int j = 0; j < numMetrics; j++) {
                    if (instanceValues[j] > 0) {
                        changeOccured = true;
                    }
                }
                if (changeOccured) {
                    instanceValues[instanceValues.length - 1] = numBugs <= lastNumBugs ? 0 : 1;
                    data.add(new DenseInstance(1.0, instanceValues));
                }
            }
            lastFile = entry.getKey().entity;
            lastValues = values;
            lastNumBugs = numBugs;
        } catch (IllegalArgumentException e) {
            System.err.println("error in line " + entry.getValue() + ": " + e.getMessage());
            System.err.println("metrics line: " + lines[entry.getValue()]);
            System.err.println("bugs line: " + linesBug[entry.getValue()]);
            System.err.println("line is ignored");
        }
    }

    return data;
}

From source file:de.ugoe.cs.cpdp.loader.AUDIChangeLoader.java

License:Apache License

public Instances load(File file, String dummy) {
    final String[] lines;
    try {/*from w ww . j  a va 2  s.co m*/
        lines = FileTools.getLinesFromFile(file.getAbsolutePath());
    } catch (IOException e) {
        throw new RuntimeException(e);
    }

    // information about bugs are in another file
    String path = file.getAbsolutePath();
    path = path.substring(0, path.length() - 14) + "repro.csv";
    final String[] linesBug;
    try {
        linesBug = FileTools.getLinesFromFile(path);
    } catch (IOException e) {
        throw new RuntimeException(e);
    }

    // configure Instances
    final ArrayList<Attribute> atts = new ArrayList<Attribute>();

    String[] lineSplit = lines[0].split(";");
    // ignore first three/four and last two columns
    int offset;
    if (lineSplit[3].equals("project_rev")) {
        offset = 4;
    } else {
        offset = 3;
    }
    for (int j = 0; j < lineSplit.length - (offset + 2); j++) {
        atts.add(new Attribute(lineSplit[j + offset]));
    }
    final ArrayList<String> classAttVals = new ArrayList<String>();
    classAttVals.add("0");
    classAttVals.add("1");
    final Attribute classAtt = new Attribute("bug", classAttVals);
    atts.add(classAtt);

    final Instances data = new Instances(file.getName(), atts, 0);
    data.setClass(classAtt);

    // fetch data
    for (int i = 1; i < lines.length; i++) {
        boolean validInstance = true;
        lineSplit = lines[i].split(";");
        String[] lineSplitBug = linesBug[i].split(";");
        double[] values = new double[data.numAttributes()];
        for (int j = 0; validInstance && j < values.length - 1; j++) {
            if (lineSplit[j + offset].trim().isEmpty()) {
                validInstance = false;
            } else {
                values[j] = Double.parseDouble(lineSplit[j + offset].trim());
            }
        }
        if (offset == 3) {
            values[values.length - 1] = lineSplitBug[7].equals("0") ? 0 : 1;
        } else {
            values[values.length - 1] = lineSplitBug[8].equals("0") ? 0 : 1;
        }

        if (validInstance) {
            data.add(new DenseInstance(1.0, values));
        } else {
            System.out.println("instance " + i + " is invalid");
        }
    }
    return data;
}

From source file:de.ugoe.cs.cpdp.loader.AUDIDataLoader.java

License:Apache License

@Override
public Instances load(File file) {
    final String[] lines;
    try {/*from  w  w w.  j  av a 2 s .  co m*/
        lines = FileTools.getLinesFromFile(file.getAbsolutePath());
    } catch (IOException e) {
        throw new RuntimeException(e);
    }

    // information about bugs are in another file
    String path = file.getAbsolutePath();
    path = path.substring(0, path.length() - 14) + "repro.csv";
    final String[] linesBug;
    try {
        linesBug = FileTools.getLinesFromFile(path);
    } catch (IOException e) {
        throw new RuntimeException(e);
    }

    // configure Instances
    final ArrayList<Attribute> atts = new ArrayList<Attribute>();

    String[] lineSplit = lines[0].split(";");
    // ignore first three/four and last two columns
    int offset;
    if (lineSplit[3].equals("project_rev")) {
        offset = 4;
    } else {
        offset = 3;
    }
    for (int j = 0; j < lineSplit.length - (offset + 2); j++) {
        atts.add(new Attribute(lineSplit[j + offset]));
    }
    final ArrayList<String> classAttVals = new ArrayList<String>();
    classAttVals.add("0");
    classAttVals.add("1");
    final Attribute classAtt = new Attribute("bug", classAttVals);
    atts.add(classAtt);

    final Instances data = new Instances(file.getName(), atts, 0);
    data.setClass(classAtt);

    // fetch data
    for (int i = 1; i < lines.length; i++) {
        boolean validInstance = true;
        lineSplit = lines[i].split(";");
        String[] lineSplitBug = linesBug[i].split(";");
        double[] values = new double[data.numAttributes()];
        for (int j = 0; validInstance && j < values.length - 1; j++) {
            if (lineSplit[j + offset].trim().isEmpty()) {
                validInstance = false;
            } else {
                values[j] = Double.parseDouble(lineSplit[j + offset].trim());
            }
        }
        if (offset == 3) {
            values[values.length - 1] = lineSplitBug[7].equals("0") ? 0 : 1;
        } else {
            values[values.length - 1] = lineSplitBug[8].equals("0") ? 0 : 1;
        }

        if (validInstance) {
            data.add(new DenseInstance(1.0, values));
        } else {
            System.out.println("instance " + i + " is invalid");
        }
    }
    return data;
}

From source file:de.ugoe.cs.cpdp.loader.CSVMockusDataLoader.java

License:Apache License

@Override
public Instances load(File file) {
    final String[] lines;
    try {//from w  ww. j  av a2s .  c o  m

        lines = FileTools.getLinesFromFile(file.getAbsolutePath());
    } catch (IOException e) {
        throw new RuntimeException(e);
    }

    // configure Instances
    final ArrayList<Attribute> atts = new ArrayList<Attribute>();

    String[] lineSplit = lines[0].split(",");
    for (int j = 0; j < lineSplit.length - 3; j++) {
        atts.add(new Attribute(lineSplit[j + 2]));
    }

    final ArrayList<String> classAttVals = new ArrayList<String>();
    classAttVals.add("0");
    classAttVals.add("1");
    final Attribute classAtt = new Attribute("bug", classAttVals);
    atts.add(classAtt);

    final Instances data = new Instances(file.getName(), atts, 0);
    data.setClass(classAtt);

    // fetch data
    for (int i = 1; i < lines.length; i++) {
        lineSplit = lines[i].split(",");
        double[] values = new double[lineSplit.length - 2];
        for (int j = 0; j < values.length - 1; j++) {
            values[j] = Double.parseDouble(lineSplit[j + 2].trim());
        }
        values[values.length - 1] = lineSplit[lineSplit.length - 1].trim().equals("0") ? 0 : 1;
        data.add(new DenseInstance(1.0, values));
    }

    return data;
}

From source file:de.ugoe.cs.cpdp.loader.DecentDataLoader.java

License:Apache License

/**
 * Creates a Weka Instances set out of a arffx model
 * //from   w w  w . j a  va  2s  . c  o m
 * @param m
 *            arffx model
 * @return
 */
private Instances createWekaDataFormat(Model m) {

    // Bad solution, can be enhanced (continue in for loop)
    ArrayList<Attribute> datasetAttributes = new ArrayList<Attribute>();
    for (de.ugoe.cs.cpdp.decentApp.models.arffx.Attribute attribute : m.getAttributes()) {
        String attributeName = attribute.getName();

        if (attributeFilter.contains(attributeName)) {
            continue;
        }

        Attribute wekaAttr;

        // Is attribute a LABEL.* attribute?
        if (isLabel(attributeName)) {
            // Classattribute
            final ArrayList<String> classAttVals = new ArrayList<String>();
            classAttVals.add("false");
            classAttVals.add("true");
            wekaAttr = new Attribute(attributeName, classAttVals);
        } else if (isConfidenceLabel(attributeName)) {
            // Is attribute a CONFIDENCE.* attribute?
            ArrayList<String> labels = new ArrayList<String>();
            labels.add("high");
            labels.add("low");
            wekaAttr = new Attribute(attributeName, labels);
        } else {
            // Is it a numeric attribute?
            wekaAttr = new Attribute(attributeName);
        }

        datasetAttributes.add(wekaAttr);
    }

    return new Instances("test-dataset", datasetAttributes, 0);
}

From source file:de.uniheidelberg.cl.swp.mlprocess.AblationTesting.java

License:Apache License

/**
 * Copies the Instances from the source Instances object to a new one, which only contains the 
 * currently tested features.//from ww  w . ja va  2  s .com
 * 
 * @param source The Instances object containing all the Instance objects from the source file. 
 * @param targetStructure The list of {@link AbstractFeatureExtractor}s which is currently 
 *          being tested.
 * @return An instances object consisting of all Instance objects from the source file.  
 */
private Instances copyInstances(Instances source, ArrayList<Attribute> targetStructure) {
    Instances target = new Instances("ACResolution", targetStructure, 0);

    for (int i = 0; i < source.numInstances(); i++) {
        double[] vals = new double[targetStructure.size()];

        for (int z = 0; z < targetStructure.size(); z++) {
            vals[z] = getAttributeValue(source.instance(i), targetStructure.get(z).name());
        }
        Instance in = new DenseInstance(1.0, vals);
        target.add(in);
    }
    return target;
}

From source file:de.uniheidelberg.cl.swp.mlprocess.InstanceContainer.java

License:Apache License

/**
 * Takes a list of {@link AbstractFeatureExtractor} and adds the corresponding feature as a
 * WEKA Attribute to the Instances structure.
 *  /* w  ww.ja  v  a2  s  . c om*/
 * @param featureList The list of FeatureExtractors to be added.
 */
public void createAttributeStructure(List<AbstractFeatureExtractor> featureList) {
    if (this.instances != null) {
        return;
    }
    ArrayList<Attribute> wekaAttributes = new ArrayList<Attribute>(featureList.size());

    for (AbstractFeatureExtractor fe : featureList) {
        wekaAttributes.add(fe.getWekaAttribute());
    }

    /* Add the ACR-Systems as feature */
    List<String> acrFeature = new ArrayList<String>();

    acrFeature.add(JavaRapRunner.class.getCanonicalName());
    acrFeature.add(LingpipeRunner.class.getCanonicalName());
    acrFeature.add(BARTRunner.class.getCanonicalName());

    List<String> correctFalsePrediction = new ArrayList<String>();

    /* Add the possible prediction values */
    correctFalsePrediction.add("+");
    correctFalsePrediction.add("-");
    correctFalsePrediction.add("?");

    wekaAttributes.add(new Attribute("acrSystem", acrFeature));
    wekaAttributes.add(new Attribute("corretOutputBySystem", correctFalsePrediction));

    this.instances = new Instances("ACResolution", wekaAttributes, 0);
    this.instances.setClassIndex(this.instances.numAttributes() - 1);
}

From source file:de.uni_koeln.phil_fak.iv.tm.p4.classification.WekaAdapter.java

License:Open Source License

private Instances initTraininSet(Set<Document> trainingData) {
    /* Der FastVector enthlt die Merkmale: */
    FastVector structureVector = new FastVector(vectorSize + 1);
    /* Die Klasse wird in Weka auch als Merkmalsvektor dargestellt: */
    FastVector classesVector = new FastVector(this.classes.size());
    for (String c : classes) {
        /*/*from w w w.j av a  2 s .c  o  m*/
         * Die Klasse ist nicht numerisch, deshalb muessen alle mglichen
         * Werte angegeben werden:
         */
        classesVector.addElement(c);
    }
    /* An Stelle 0 unseres Gesamtvektors kommt der Klassen-Vektor: */
    structureVector.addElement(new Attribute("Ressort", classesVector));
    for (int i = 0; i < vectorSize; i++) {
        /*
         * An jeder Position unseres Merkmalsvektors haben wir ein
         * numerisches Merkmal (reprsentiert als Attribute), dessen Name
         * sein Index ist:
         */
        structureVector.addElement(new Attribute(i + "")); // Merkmal i,
                                                           // d.h. was? >
                                                           // TF-IDF
    }
    /*
     * Schliesslich erstellen wir einen Container fr unsere
     * Trainingsbeispiele, der Instanzen der beschriebenen Merkmale
     * enthalten wird:
     */
    Instances result = new Instances("InstanceStructure", structureVector, vectorSize + 1);
    /*
     * Wobei wir noch angeben muessen, an welcher Stelle der
     * Merkmalsvektoren die Klasse zu finden ist:
     */
    result.setClassIndex(0);
    return result;
}

From source file:de.uni_koeln.spinfo.classification.zoneAnalysis.classifier.WekaClassifier.java

License:Open Source License

private Instances initTrainingSet(List<ClassifyUnit> trainingData) {

    int vectorSize = trainingData.get(0).getFeatureVector().length;
    Set<Integer> classIDs = new TreeSet<Integer>();
    for (ClassifyUnit classifyUnit : trainingData) {
        ZoneClassifyUnit actual = (ZoneClassifyUnit) classifyUnit;
        classIDs.add(actual.getActualClassID());
    }/*w  ww. j  a  v  a 2 s  .  c  om*/
    /* Der Vektor enthlt die numerischen Merkmale (bei uns: tf-idf-Werte) sowie ein Klassenattribut: */
    ArrayList<Attribute> structureVector = new ArrayList<Attribute>(vectorSize + 1);
    /* Auch die Klasse wird in Weka als Vektor dargestellt: */
    ArrayList<String> classesVector = new ArrayList<String>();
    for (Integer c : classIDs) {
        /*
         * Da das Klassen-Attribut nicht numerisch ist (sondern, in Weka-Terminologie, ein nominales bzw.
         * String-Attribut), mssen hier alle mglichen Attributwerte angegeben werden:
         */
        classesVector.add(c + "");
    }
    /* An Stelle 0 unseres Strukturvektors kommt der Klassen-Vektor: */
    structureVector.add(new Attribute("topic", classesVector));
    for (int i = 0; i < vectorSize; i++) {
        /*
         * An jeder weiteren Position unseres Merkmalsvektors haben wir ein numerisches Merkmal (reprsentiert als
         * Attribute), dessen Name hier einfach seine Indexposition ist:
         */
        structureVector.add(new Attribute(i + "")); // Merkmal i, d.h. was? > TF-IDF
    }
    /*
     * Schliesslich erstellen wir einen Container, der Instanzen in der hier beschriebenen Struktur enthalten wird
     * (also unsere Trainingsbeispiele):
     */
    Instances result = new Instances("InstanceStructure", structureVector, vectorSize + 1);
    /*
     * Wobei wir hier erneut angeben muessen, an welcher Stelle der Merkmalsvektoren die Klasse zu finden ist:
     */
    result.setClassIndex(0);
    return result;
}

From source file:de.upb.timok.utils.DatasetTransformationUtils.java

License:Open Source License

public static Instances trainingSetToInstances(List<double[]> trainingSet) {
    final double[] sample = trainingSet.get(0);
    final ArrayList<Attribute> fvWekaAttributes = new ArrayList<>(sample.length + 1);
    for (int i = 0; i < sample.length; i++) {
        fvWekaAttributes.add(new Attribute(Integer.toString(i)));
    }/*from   w ww. j a  v a  2  s.c om*/

    final ArrayList<String> classStrings = new ArrayList<>();
    classStrings.add("normal");
    final Attribute ClassAttribute = new Attribute("class", classStrings);

    // Declare the feature vector
    fvWekaAttributes.add(ClassAttribute);
    final Instances result = new Instances("trainingSet", fvWekaAttributes, trainingSet.size());
    result.setClass(ClassAttribute);
    result.setClassIndex(fvWekaAttributes.size() - 1);
    for (final double[] instance : trainingSet) {
        final double[] newInstance = Arrays.copyOf(instance, instance.length + 1);
        newInstance[newInstance.length - 1] = 0;
        final Instance wekaInstance = new DenseInstance(1, newInstance);
        wekaInstance.setDataset(result);
        result.add(wekaInstance);
    }
    return result;
}