Example usage for weka.core Instances Instances

List of usage examples for weka.core Instances Instances

Introduction

In this page you can find the example usage for weka.core Instances Instances.

Prototype

public Instances(String name, ArrayList<Attribute> attInfo, int capacity) 

Source Link

Document

Creates an empty set of instances.

Usage

From source file:de.ugoe.cs.cpdp.loader.AUDIChangeLoader.java

License:Apache License

@Override
public Instances load(File file) {
    final String[] lines;
    String[] lineSplit;//from  ww  w.j ava  2  s .c  om
    String[] lineSplitBug;

    try {
        lines = FileTools.getLinesFromFile(file.getAbsolutePath());
    } catch (IOException e) {
        throw new RuntimeException(e);
    }

    // information about bugs are in another file
    String path = file.getAbsolutePath();
    path = path.substring(0, path.length() - 14) + "repro.csv";
    final String[] linesBug;
    try {
        linesBug = FileTools.getLinesFromFile(path);
    } catch (IOException e) {
        throw new RuntimeException(e);
    }

    int revisionIndex = -1;
    int bugIndex = -1;
    lineSplitBug = linesBug[0].split(";");
    for (int j = 0; j < lineSplitBug.length; j++) {
        if (lineSplitBug[j].equals("svnrev")) {
            revisionIndex = j;
        }
        if (lineSplitBug[j].equals("num_bugs_trace")) {
            bugIndex = j;
        }
    }
    if (revisionIndex < 0) {
        throw new RuntimeException("could not find SVN revisions");
    }
    if (bugIndex < 0) {
        throw new RuntimeException("could not find bug information");
    }

    int metricsStartIndex = -1;
    int metricsEndIndex = -1;
    lineSplit = lines[0].split(";");
    for (int j = 0; j < lineSplit.length; j++) {
        if (lineSplit[j].equals("lm_LOC")) {
            metricsStartIndex = j;
        }
        if (lineSplit[j].equals("h_E")) {
            metricsEndIndex = j;
        }
    }
    if (metricsStartIndex < 0) {
        throw new RuntimeException("could not find first metric, i.e., lm_LOC");
    }
    if (metricsEndIndex < 0) {
        throw new RuntimeException("could not find last metric, i.e., h_E");
    }
    int numMetrics = metricsEndIndex - metricsStartIndex + 1;

    // create sets of all filenames and revisions
    SortedMap<EntityRevisionPair, Integer> entityRevisionPairs = new TreeMap<>();
    for (int i = 1; i < linesBug.length; i++) {
        lineSplitBug = linesBug[i].split(";");
        entityRevisionPairs
                .put(new EntityRevisionPair(lineSplitBug[0], Integer.parseInt(lineSplitBug[revisionIndex])), i);
    }

    // prepare weka instances
    final ArrayList<Attribute> atts = new ArrayList<Attribute>();
    lineSplit = lines[0].split(";");
    for (int j = metricsStartIndex; j <= metricsEndIndex; j++) {
        atts.add(new Attribute(lineSplit[j] + "_delta"));
    }
    for (int j = metricsStartIndex; j <= metricsEndIndex; j++) {
        atts.add(new Attribute(lineSplit[j] + "_abs"));
    }
    final ArrayList<String> classAttVals = new ArrayList<String>();
    classAttVals.add("0");
    classAttVals.add("1");
    final Attribute classAtt = new Attribute("bug", classAttVals);
    atts.add(classAtt);

    final Instances data = new Instances(file.getName(), atts, 0);
    data.setClass(classAtt);

    // create data
    String lastFile = null;
    double[] lastValues = null;
    int lastNumBugs = 0;
    for (Entry<EntityRevisionPair, Integer> entry : entityRevisionPairs.entrySet()) {
        try {
            // first get values
            lineSplit = lines[entry.getValue()].split(";");
            lineSplitBug = linesBug[entry.getValue()].split(";");
            int i = 0;
            double[] values = new double[numMetrics];
            for (int j = metricsStartIndex; j <= metricsEndIndex; j++) {
                values[i] = Double.parseDouble(lineSplit[j]);
                i++;
            }
            int numBugs = Integer.parseInt(lineSplitBug[bugIndex]);

            // then check if an entity must be created
            if (entry.getKey().entity.equals(lastFile)) {
                // create new instance
                double[] instanceValues = new double[2 * numMetrics + 1];
                for (int j = 0; j < numMetrics; j++) {
                    instanceValues[j] = values[j] - lastValues[j];
                    instanceValues[j + numMetrics] = values[j];
                }
                // check if any value>0
                boolean changeOccured = false;
                for (int j = 0; j < numMetrics; j++) {
                    if (instanceValues[j] > 0) {
                        changeOccured = true;
                    }
                }
                if (changeOccured) {
                    instanceValues[instanceValues.length - 1] = numBugs <= lastNumBugs ? 0 : 1;
                    data.add(new DenseInstance(1.0, instanceValues));
                }
            }
            lastFile = entry.getKey().entity;
            lastValues = values;
            lastNumBugs = numBugs;
        } catch (IllegalArgumentException e) {
            System.err.println("error in line " + entry.getValue() + ": " + e.getMessage());
            System.err.println("metrics line: " + lines[entry.getValue()]);
            System.err.println("bugs line: " + linesBug[entry.getValue()]);
            System.err.println("line is ignored");
        }
    }

    return data;
}

From source file:de.ugoe.cs.cpdp.loader.AUDIChangeLoader.java

License:Apache License

public Instances load(File file, String dummy) {
    final String[] lines;
    try {/*from w ww . j  a va 2  s.co m*/
        lines = FileTools.getLinesFromFile(file.getAbsolutePath());
    } catch (IOException e) {
        throw new RuntimeException(e);
    }

    // information about bugs are in another file
    String path = file.getAbsolutePath();
    path = path.substring(0, path.length() - 14) + "repro.csv";
    final String[] linesBug;
    try {
        linesBug = FileTools.getLinesFromFile(path);
    } catch (IOException e) {
        throw new RuntimeException(e);
    }

    // configure Instances
    final ArrayList<Attribute> atts = new ArrayList<Attribute>();

    String[] lineSplit = lines[0].split(";");
    // ignore first three/four and last two columns
    int offset;
    if (lineSplit[3].equals("project_rev")) {
        offset = 4;
    } else {
        offset = 3;
    }
    for (int j = 0; j < lineSplit.length - (offset + 2); j++) {
        atts.add(new Attribute(lineSplit[j + offset]));
    }
    final ArrayList<String> classAttVals = new ArrayList<String>();
    classAttVals.add("0");
    classAttVals.add("1");
    final Attribute classAtt = new Attribute("bug", classAttVals);
    atts.add(classAtt);

    final Instances data = new Instances(file.getName(), atts, 0);
    data.setClass(classAtt);

    // fetch data
    for (int i = 1; i < lines.length; i++) {
        boolean validInstance = true;
        lineSplit = lines[i].split(";");
        String[] lineSplitBug = linesBug[i].split(";");
        double[] values = new double[data.numAttributes()];
        for (int j = 0; validInstance && j < values.length - 1; j++) {
            if (lineSplit[j + offset].trim().isEmpty()) {
                validInstance = false;
            } else {
                values[j] = Double.parseDouble(lineSplit[j + offset].trim());
            }
        }
        if (offset == 3) {
            values[values.length - 1] = lineSplitBug[7].equals("0") ? 0 : 1;
        } else {
            values[values.length - 1] = lineSplitBug[8].equals("0") ? 0 : 1;
        }

        if (validInstance) {
            data.add(new DenseInstance(1.0, values));
        } else {
            System.out.println("instance " + i + " is invalid");
        }
    }
    return data;
}

From source file:de.ugoe.cs.cpdp.loader.AUDIDataLoader.java

License:Apache License

@Override
public Instances load(File file) {
    final String[] lines;
    try {/*from  w  w w.  j  av a 2 s .  co m*/
        lines = FileTools.getLinesFromFile(file.getAbsolutePath());
    } catch (IOException e) {
        throw new RuntimeException(e);
    }

    // information about bugs are in another file
    String path = file.getAbsolutePath();
    path = path.substring(0, path.length() - 14) + "repro.csv";
    final String[] linesBug;
    try {
        linesBug = FileTools.getLinesFromFile(path);
    } catch (IOException e) {
        throw new RuntimeException(e);
    }

    // configure Instances
    final ArrayList<Attribute> atts = new ArrayList<Attribute>();

    String[] lineSplit = lines[0].split(";");
    // ignore first three/four and last two columns
    int offset;
    if (lineSplit[3].equals("project_rev")) {
        offset = 4;
    } else {
        offset = 3;
    }
    for (int j = 0; j < lineSplit.length - (offset + 2); j++) {
        atts.add(new Attribute(lineSplit[j + offset]));
    }
    final ArrayList<String> classAttVals = new ArrayList<String>();
    classAttVals.add("0");
    classAttVals.add("1");
    final Attribute classAtt = new Attribute("bug", classAttVals);
    atts.add(classAtt);

    final Instances data = new Instances(file.getName(), atts, 0);
    data.setClass(classAtt);

    // fetch data
    for (int i = 1; i < lines.length; i++) {
        boolean validInstance = true;
        lineSplit = lines[i].split(";");
        String[] lineSplitBug = linesBug[i].split(";");
        double[] values = new double[data.numAttributes()];
        for (int j = 0; validInstance && j < values.length - 1; j++) {
            if (lineSplit[j + offset].trim().isEmpty()) {
                validInstance = false;
            } else {
                values[j] = Double.parseDouble(lineSplit[j + offset].trim());
            }
        }
        if (offset == 3) {
            values[values.length - 1] = lineSplitBug[7].equals("0") ? 0 : 1;
        } else {
            values[values.length - 1] = lineSplitBug[8].equals("0") ? 0 : 1;
        }

        if (validInstance) {
            data.add(new DenseInstance(1.0, values));
        } else {
            System.out.println("instance " + i + " is invalid");
        }
    }
    return data;
}

From source file:de.ugoe.cs.cpdp.loader.CSVMockusDataLoader.java

License:Apache License

@Override
public Instances load(File file) {
    final String[] lines;
    try {//from w  ww. j  av a2s .  c o  m

        lines = FileTools.getLinesFromFile(file.getAbsolutePath());
    } catch (IOException e) {
        throw new RuntimeException(e);
    }

    // configure Instances
    final ArrayList<Attribute> atts = new ArrayList<Attribute>();

    String[] lineSplit = lines[0].split(",");
    for (int j = 0; j < lineSplit.length - 3; j++) {
        atts.add(new Attribute(lineSplit[j + 2]));
    }

    final ArrayList<String> classAttVals = new ArrayList<String>();
    classAttVals.add("0");
    classAttVals.add("1");
    final Attribute classAtt = new Attribute("bug", classAttVals);
    atts.add(classAtt);

    final Instances data = new Instances(file.getName(), atts, 0);
    data.setClass(classAtt);

    // fetch data
    for (int i = 1; i < lines.length; i++) {
        lineSplit = lines[i].split(",");
        double[] values = new double[lineSplit.length - 2];
        for (int j = 0; j < values.length - 1; j++) {
            values[j] = Double.parseDouble(lineSplit[j + 2].trim());
        }
        values[values.length - 1] = lineSplit[lineSplit.length - 1].trim().equals("0") ? 0 : 1;
        data.add(new DenseInstance(1.0, values));
    }

    return data;
}

From source file:de.ugoe.cs.cpdp.loader.DecentDataLoader.java

License:Apache License

/**
 * Creates a Weka Instances set out of a arffx model
 * //from   w w  w . j a  va  2s  . c  o m
 * @param m
 *            arffx model
 * @return
 */
private Instances createWekaDataFormat(Model m) {

    // Bad solution, can be enhanced (continue in for loop)
    ArrayList<Attribute> datasetAttributes = new ArrayList<Attribute>();
    for (de.ugoe.cs.cpdp.decentApp.models.arffx.Attribute attribute : m.getAttributes()) {
        String attributeName = attribute.getName();

        if (attributeFilter.contains(attributeName)) {
            continue;
        }

        Attribute wekaAttr;

        // Is attribute a LABEL.* attribute?
        if (isLabel(attributeName)) {
            // Classattribute
            final ArrayList<String> classAttVals = new ArrayList<String>();
            classAttVals.add("false");
            classAttVals.add("true");
            wekaAttr = new Attribute(attributeName, classAttVals);
        } else if (isConfidenceLabel(attributeName)) {
            // Is attribute a CONFIDENCE.* attribute?
            ArrayList<String> labels = new ArrayList<String>();
            labels.add("high");
            labels.add("low");
            wekaAttr = new Attribute(attributeName, labels);
        } else {
            // Is it a numeric attribute?
            wekaAttr = new Attribute(attributeName);
        }

        datasetAttributes.add(wekaAttr);
    }

    return new Instances("test-dataset", datasetAttributes, 0);
}

From source file:de.uniheidelberg.cl.swp.mlprocess.AblationTesting.java

License:Apache License

/**
 * Copies the Instances from the source Instances object to a new one, which only contains the 
 * currently tested features.//from ww  w . ja va  2  s .com
 * 
 * @param source The Instances object containing all the Instance objects from the source file. 
 * @param targetStructure The list of {@link AbstractFeatureExtractor}s which is currently 
 *          being tested.
 * @return An instances object consisting of all Instance objects from the source file.  
 */
private Instances copyInstances(Instances source, ArrayList<Attribute> targetStructure) {
    Instances target = new Instances("ACResolution", targetStructure, 0);

    for (int i = 0; i < source.numInstances(); i++) {
        double[] vals = new double[targetStructure.size()];

        for (int z = 0; z < targetStructure.size(); z++) {
            vals[z] = getAttributeValue(source.instance(i), targetStructure.get(z).name());
        }
        Instance in = new DenseInstance(1.0, vals);
        target.add(in);
    }
    return target;
}

From source file:de.uniheidelberg.cl.swp.mlprocess.InstanceContainer.java

License:Apache License

/**
 * Takes a list of {@link AbstractFeatureExtractor} and adds the corresponding feature as a
 * WEKA Attribute to the Instances structure.
 *  /* w  ww.ja  v  a2  s  . c om*/
 * @param featureList The list of FeatureExtractors to be added.
 */
public void createAttributeStructure(List<AbstractFeatureExtractor> featureList) {
    if (this.instances != null) {
        return;
    }
    ArrayList<Attribute> wekaAttributes = new ArrayList<Attribute>(featureList.size());

    for (AbstractFeatureExtractor fe : featureList) {
        wekaAttributes.add(fe.getWekaAttribute());
    }

    /* Add the ACR-Systems as feature */
    List<String> acrFeature = new ArrayList<String>();

    acrFeature.add(JavaRapRunner.class.getCanonicalName());
    acrFeature.add(LingpipeRunner.class.getCanonicalName());
    acrFeature.add(BARTRunner.class.getCanonicalName());

    List<String> correctFalsePrediction = new ArrayList<String>();

    /* Add the possible prediction values */
    correctFalsePrediction.add("+");
    correctFalsePrediction.add("-");
    correctFalsePrediction.add("?");

    wekaAttributes.add(new Attribute("acrSystem", acrFeature));
    wekaAttributes.add(new Attribute("corretOutputBySystem", correctFalsePrediction));

    this.instances = new Instances("ACResolution", wekaAttributes, 0);
    this.instances.setClassIndex(this.instances.numAttributes() - 1);
}

From source file:de.uni_koeln.phil_fak.iv.tm.p4.classification.WekaAdapter.java

License:Open Source License

private Instances initTraininSet(Set<Document> trainingData) {
    /* Der FastVector enthlt die Merkmale: */
    FastVector structureVector = new FastVector(vectorSize + 1);
    /* Die Klasse wird in Weka auch als Merkmalsvektor dargestellt: */
    FastVector classesVector = new FastVector(this.classes.size());
    for (String c : classes) {
        /*/*from w w w.j av a  2 s .c  o  m*/
         * Die Klasse ist nicht numerisch, deshalb muessen alle mglichen
         * Werte angegeben werden:
         */
        classesVector.addElement(c);
    }
    /* An Stelle 0 unseres Gesamtvektors kommt der Klassen-Vektor: */
    structureVector.addElement(new Attribute("Ressort", classesVector));
    for (int i = 0; i < vectorSize; i++) {
        /*
         * An jeder Position unseres Merkmalsvektors haben wir ein
         * numerisches Merkmal (reprsentiert als Attribute), dessen Name
         * sein Index ist:
         */
        structureVector.addElement(new Attribute(i + "")); // Merkmal i,
                                                           // d.h. was? >
                                                           // TF-IDF
    }
    /*
     * Schliesslich erstellen wir einen Container fr unsere
     * Trainingsbeispiele, der Instanzen der beschriebenen Merkmale
     * enthalten wird:
     */
    Instances result = new Instances("InstanceStructure", structureVector, vectorSize + 1);
    /*
     * Wobei wir noch angeben muessen, an welcher Stelle der
     * Merkmalsvektoren die Klasse zu finden ist:
     */
    result.setClassIndex(0);
    return result;
}

From source file:de.uni_koeln.spinfo.classification.zoneAnalysis.classifier.WekaClassifier.java

License:Open Source License

private Instances initTrainingSet(List<ClassifyUnit> trainingData) {

    int vectorSize = trainingData.get(0).getFeatureVector().length;
    Set<Integer> classIDs = new TreeSet<Integer>();
    for (ClassifyUnit classifyUnit : trainingData) {
        ZoneClassifyUnit actual = (ZoneClassifyUnit) classifyUnit;
        classIDs.add(actual.getActualClassID());
    }/*w  ww. j  a  v  a 2 s  .  c  om*/
    /* Der Vektor enthlt die numerischen Merkmale (bei uns: tf-idf-Werte) sowie ein Klassenattribut: */
    ArrayList<Attribute> structureVector = new ArrayList<Attribute>(vectorSize + 1);
    /* Auch die Klasse wird in Weka als Vektor dargestellt: */
    ArrayList<String> classesVector = new ArrayList<String>();
    for (Integer c : classIDs) {
        /*
         * Da das Klassen-Attribut nicht numerisch ist (sondern, in Weka-Terminologie, ein nominales bzw.
         * String-Attribut), mssen hier alle mglichen Attributwerte angegeben werden:
         */
        classesVector.add(c + "");
    }
    /* An Stelle 0 unseres Strukturvektors kommt der Klassen-Vektor: */
    structureVector.add(new Attribute("topic", classesVector));
    for (int i = 0; i < vectorSize; i++) {
        /*
         * An jeder weiteren Position unseres Merkmalsvektors haben wir ein numerisches Merkmal (reprsentiert als
         * Attribute), dessen Name hier einfach seine Indexposition ist:
         */
        structureVector.add(new Attribute(i + "")); // Merkmal i, d.h. was? > TF-IDF
    }
    /*
     * Schliesslich erstellen wir einen Container, der Instanzen in der hier beschriebenen Struktur enthalten wird
     * (also unsere Trainingsbeispiele):
     */
    Instances result = new Instances("InstanceStructure", structureVector, vectorSize + 1);
    /*
     * Wobei wir hier erneut angeben muessen, an welcher Stelle der Merkmalsvektoren die Klasse zu finden ist:
     */
    result.setClassIndex(0);
    return result;
}

From source file:de.upb.timok.utils.DatasetTransformationUtils.java

License:Open Source License

public static Instances trainingSetToInstances(List<double[]> trainingSet) {
    final double[] sample = trainingSet.get(0);
    final ArrayList<Attribute> fvWekaAttributes = new ArrayList<>(sample.length + 1);
    for (int i = 0; i < sample.length; i++) {
        fvWekaAttributes.add(new Attribute(Integer.toString(i)));
    }/*from   w ww. j a  v a  2  s.c om*/

    final ArrayList<String> classStrings = new ArrayList<>();
    classStrings.add("normal");
    final Attribute ClassAttribute = new Attribute("class", classStrings);

    // Declare the feature vector
    fvWekaAttributes.add(ClassAttribute);
    final Instances result = new Instances("trainingSet", fvWekaAttributes, trainingSet.size());
    result.setClass(ClassAttribute);
    result.setClassIndex(fvWekaAttributes.size() - 1);
    for (final double[] instance : trainingSet) {
        final double[] newInstance = Arrays.copyOf(instance, instance.length + 1);
        newInstance[newInstance.length - 1] = 0;
        final Instance wekaInstance = new DenseInstance(1, newInstance);
        wekaInstance.setDataset(result);
        result.add(wekaInstance);
    }
    return result;
}