Example usage for weka.core Instances add

List of usage examples for weka.core Instances add

Introduction

In this page you can find the example usage for weka.core Instances add.

Prototype

@Override
public boolean add(Instance instance) 

Source Link

Document

Adds one instance to the end of the set.

Usage

From source file:reactivetechnologies.sentigrade.dto.RequestData.java

License:Apache License

/**
 * Construct a 2-attribute text instance, with the class attribute at last.
 * @return//from w  ww  .ja  v  a  2 s  . c  om
 */
public Instances toInstances() {
    Assert.notEmpty(getDataSet(), "'dataSet' is empty or null");
    Instances data = getStructure();
    Instance i;
    for (Tuple t : getDataSet()) {
        if (StringUtils.isEmpty(t.textClass) || t.text == null)
            continue;

        i = buildInstance(data, t);

        data.add(i);
    }
    return data;
}

From source file:reactivetechnologies.sentigrade.dto.VectorRequestData.java

License:Apache License

@Override
public Instances toInstances() {
    Assert.notEmpty(getDataSet(), "'dataSet' is empty or null");
    final Instances data = getStructure();
    BuildInstancesDelegate builder = analyzer.newInstancesBuilder();
    int count = 0;
    log.info("Start transforming to vector. This may take some time ..");
    long start = System.currentTimeMillis();
    for (Tuple t : getDataSet()) {
        if (StringUtils.isEmpty(t.textClass) || t.text == null)
            continue;

        builder.submitInstance(data, t);
        count++;/* w  ww  . j a v a  2s. c o  m*/

    }
    int pct = count / 10;
    for (int i = 0; i < count; i++) {
        try {
            data.add(builder.pollInstance());
            if (i > 0 && i % pct == 0)
                log.info("Processed " + (10 * (i / pct)) + "% ..");

        } catch (InterruptedException e) {
            Thread.currentThread().interrupt();
            throw new OperationFailedUnexpectedly(e);
        }
    }
    long time = System.currentTimeMillis() - start;
    log.info("End transformation. Time taken: " + ConfigUtil.toTimeElapsedString(time));
    return data;
}

From source file:Reader.Classifyer.java

/**
 * Converts a list of images into instances
 * @param images//from   w  w  w.j a  v a  2  s.  c o  m
 * @return 
 */
private static Instances ImageListToInstances(List<BufferedImage> images) {
    int letterCount = 0;
    FastVector letters = new FastVector();
    for (String thisLetter : letterVal) {
        letters.addElement(thisLetter);
    }

    FastVector fvNominalVal = new FastVector(ATT_COUNT + 1);
    for (Integer x = 0; x < ATT_COUNT + 1; x++) {
        fvNominalVal.addElement(new Attribute(x.toString()));
    }

    Instances trainingData = new Instances("letters", fvNominalVal, 6500);
    trainingData.setClassIndex(ATT_COUNT);

    for (BufferedImage image : images) {
        Instance temp = imageToInstance(image);
        try {
            temp.setValue(ATT_COUNT, letterCount++ / 24);
        } catch (Exception ex) {
            System.out.println(ex + " Thrown in Classifying");
        }

        trainingData.add(temp);
    }

    return trainingData;
}

From source file:se.de.hu_berlin.informatik.faultlocalizer.machinelearn.WekaFaultLocalizer.java

License:Open Source License

@Override
public SBFLRanking<T> localize(final ILocalizerCache<T> localizer, ComputationStrategies strategy) {

    // == 1. Create Weka training instance

    final List<INode<T>> nodes = new ArrayList<>(localizer.getNodes());

    // nominal true/false values
    final List<String> tf = new ArrayList<>();
    tf.add("t");/* w w  w .ja  v  a2 s.com*/
    tf.add("f");

    // create an attribute for each component
    final Map<INode<T>, Attribute> attributeMap = new HashMap<>();
    final ArrayList<Attribute> attributeList = new ArrayList<>(); // NOCS: Weka needs ArrayList..
    for (final INode<T> node : nodes) {
        final Attribute attribute = new Attribute(node.toString(), tf);
        attributeList.add(attribute);
        attributeMap.put(node, attribute);
    }

    // create class attribute (trace success)
    final Attribute successAttribute = new Attribute("success", tf);
    attributeList.add(successAttribute);

    // create weka training instance
    final Instances trainingSet = new Instances("TraceInfoInstances", attributeList, 1);
    trainingSet.setClassIndex(attributeList.size() - 1);

    // == 2. add traces to training set

    // add an instance for each trace
    for (final ITrace<T> trace : localizer.getTraces()) {
        final Instance instance = new DenseInstance(nodes.size() + 1);
        instance.setDataset(trainingSet);
        for (final INode<T> node : nodes) {
            instance.setValue(attributeMap.get(node), trace.isInvolved(node) ? "t" : "f");
        }
        instance.setValue(successAttribute, trace.isSuccessful() ? "t" : "f");
        trainingSet.add(instance);
    }

    // == 3. use prediction to localize faults

    // build classifier
    try {
        final Classifier classifier = this.buildClassifier(this.classifierName, this.classifierOptions,
                trainingSet);
        final SBFLRanking<T> ranking = new SBFLRanking<>();

        Log.out(this, "begin classifying");
        int classified = 0;

        final Instance instance = new DenseInstance(nodes.size() + 1);
        instance.setDataset(trainingSet);
        for (final INode<T> node : nodes) {
            instance.setValue(attributeMap.get(node), "f");
        }
        instance.setValue(successAttribute, "f");

        for (final INode<T> node : nodes) {
            classified++;
            if (classified % 1000 == 0) {
                Log.out(this, String.format("Classified %d nodes.", classified));
            }

            // contain only the current node in the network
            instance.setValue(attributeMap.get(node), "t");

            // predict with which probability this setup leads to a failing network
            final double[] distribution = classifier.distributionForInstance(instance);
            ranking.add(node, distribution[1]);

            // reset involvment for node
            instance.setValue(attributeMap.get(node), "f");
        }
        return ranking;
    } catch (final Exception e) { // NOCS: Weka throws only raw exceptions
        throw new RuntimeException(e);
    }
}

From source file:se.de.hu_berlin.informatik.stardust.localizer.machinelearn.WekaFaultLocalizer.java

License:Open Source License

@Override
public SBFLRanking<T> localize(final ISpectra<T> spectra) {

    // == 1. Create Weka training instance

    final List<INode<T>> nodes = new ArrayList<>(spectra.getNodes());

    // nominal true/false values
    final List<String> tf = new ArrayList<String>();
    tf.add("t");/*  ww w.j ava2 s. c  o m*/
    tf.add("f");

    // create an attribute for each component
    final Map<INode<T>, Attribute> attributeMap = new HashMap<INode<T>, Attribute>();
    final ArrayList<Attribute> attributeList = new ArrayList<Attribute>(); // NOCS: Weka needs ArrayList..
    for (final INode<T> node : nodes) {
        final Attribute attribute = new Attribute(node.toString(), tf);
        attributeList.add(attribute);
        attributeMap.put(node, attribute);
    }

    // create class attribute (trace success)
    final Attribute successAttribute = new Attribute("success", tf);
    attributeList.add(successAttribute);

    // create weka training instance
    final Instances trainingSet = new Instances("TraceInfoInstances", attributeList, 1);
    trainingSet.setClassIndex(attributeList.size() - 1);

    // == 2. add traces to training set

    // add an instance for each trace
    for (final ITrace<T> trace : spectra.getTraces()) {
        final Instance instance = new DenseInstance(nodes.size() + 1);
        instance.setDataset(trainingSet);
        for (final INode<T> node : nodes) {
            instance.setValue(attributeMap.get(node), trace.isInvolved(node) ? "t" : "f");
        }
        instance.setValue(successAttribute, trace.isSuccessful() ? "t" : "f");
        trainingSet.add(instance);
    }

    // == 3. use prediction to localize faults

    // build classifier
    try {
        final Classifier classifier = this.buildClassifier(this.classifierName, this.classifierOptions,
                trainingSet);
        final SBFLRanking<T> ranking = new SBFLRanking<>();

        Log.out(this, "begin classifying");
        int classified = 0;

        final Instance instance = new DenseInstance(nodes.size() + 1);
        instance.setDataset(trainingSet);
        for (final INode<T> node : nodes) {
            instance.setValue(attributeMap.get(node), "f");
        }
        instance.setValue(successAttribute, "f");

        for (final INode<T> node : nodes) {
            classified++;
            if (classified % 1000 == 0) {
                Log.out(this, String.format("Classified %d nodes.", classified));
            }

            // contain only the current node in the network
            instance.setValue(attributeMap.get(node), "t");

            // predict with which probability this setup leads to a failing network
            final double[] distribution = classifier.distributionForInstance(instance);
            ranking.add(node, distribution[1]);

            // reset involvment for node
            instance.setValue(attributeMap.get(node), "f");
        }
        return ranking;
    } catch (final Exception e) { // NOCS: Weka throws only raw exceptions
        throw new RuntimeException(e);
    }
}

From source file:semana07.IrisKnn.java

public static void main(String[] args) throws FileNotFoundException, IOException, Exception {

    // DEFININDO CONJUNTO DE TREINAMENTO

    // - Definindo o leitor do arquivo arff

    FileReader baseIris = new FileReader("iris.arff");
    // - Definindo o grupo de instancias a partir do arquivo "simpsons.arff"

    Instances iris = new Instances(baseIris);

    // - Definindo o indice do atributo classe

    iris.setClassIndex(4);/*from   w w  w .  ja va 2  s  .  c om*/

    iris = iris.resample(new Debug.Random());

    Instances irisTreino = iris.trainCV(3, 0);
    Instances irisTeste = iris.testCV(3, 0);

    // DEFININDO EXEMPLO DESCONHECIDO

    //5.9,3.0,5.1,1.8,Iris-virginica
    Instance irisInst = new DenseInstance(iris.numAttributes());
    irisInst.setDataset(iris);
    irisInst.setValue(0, 5.9);
    irisInst.setValue(1, 3.0);
    irisInst.setValue(2, 5.1);
    irisInst.setValue(3, 1.8);

    // DEFININDO ALGORITMO DE CLASSIFICAO

    //NN

    IBk vizinhoIris = new IBk();

    //kNN

    IBk knnIris = new IBk(3);

    // MONTANDO CLASSIFICADOR
    //NN

    vizinhoIris.buildClassifier(irisTreino);

    //kNN

    knnIris.buildClassifier(irisTreino);

    // Definindo arquivo a ser escrito
    FileWriter writer = new FileWriter("iris.csv");

    // Escrevendo o cabealho do arquivo
    writer.append("Classe Real;Resultado NN;Resultado kNN");
    writer.append(System.lineSeparator());

    // Sada CLI / Console
    System.out.println("Classe Real;Resultado NN;Resultado kNN"); //Cabealho
    for (int i = 0; i <= irisTeste.numInstances() - 1; i++) {

        Instance testeIris = irisTeste.instance(i);

        // Sada CLI / Console do valor original
        System.out.print(testeIris.stringValue(4) + ";");

        // Escrevendo o valor original no arquivo
        writer.append(testeIris.stringValue(4) + ";");

        // Definindo o atributo classe como indefinido
        testeIris.setClassMissing();

        // CLASSIFICANDO A INSTANCIA
        // NN

        double respostaVizinho = vizinhoIris.classifyInstance(testeIris);
        testeIris.setValue(4, respostaVizinho);
        String stringVizinho = testeIris.stringValue(4);

        //kNN

        double respostaKnn = knnIris.classifyInstance(testeIris);

        // Atribuindo respota ao valor do atributo do index 4(classe)

        testeIris.setValue(4, respostaKnn);

        String stringKnn = testeIris.stringValue(4);
        // Adicionando resultado ao grupo de instancia iris

        iris.add(irisInst);

        //Escrevendo os resultados no arquivo iris.csv

        writer.append(stringVizinho + ";");
        writer.append(stringKnn + ";");
        writer.append(System.lineSeparator());

        // Exibindo via CLI / Console o resultado

        System.out.print(respostaVizinho + ";");
        System.out.print(respostaKnn + ";");
        System.out.println(testeIris.stringValue(4));
    }

    writer.flush();
    writer.close();

}

From source file:sg.edu.nus.comp.nlp.ims.classifiers.CMultiClassesSVM.java

License:Open Source License

@Override
public void buildClassifier(Instances p_Instances) throws Exception {
    Instances newInsts = null;
    if (this.m_Classifier == null) {
        throw new IllegalStateException("No base classifier has been set!");
    }//from   ww  w . j  a v  a 2s .c  om

    this.m_ZeroR = new ZeroR();
    this.m_ZeroR.buildClassifier(p_Instances);

    this.m_ClassAttribute = p_Instances.classAttribute();
    this.getOutputFormat(p_Instances);
    int numClassifiers = p_Instances.numClasses();
    switch (numClassifiers) {
    case 1:
        this.m_Classifiers = null;
        break;
    case 2:
        this.m_Classifiers = Classifier.makeCopies(this.m_Classifier, 1);
        newInsts = new Instances(this.m_OutputFormat, 0);
        for (int i = 0; i < p_Instances.numInstances(); i++) {
            Instance inst = this.filterInstance(p_Instances.instance(i));
            inst.setDataset(newInsts);
            newInsts.add(inst);
        }
        this.m_Classifiers[0].buildClassifier(newInsts);
        break;
    default:
        this.m_Classifiers = Classifier.makeCopies(this.m_Classifier, numClassifiers);
        Hashtable<String, ArrayList<Double>> id2Classes = null;
        if (this.m_IndexOfID >= 0) {
            id2Classes = new Hashtable<String, ArrayList<Double>>();
            for (int i = 0; i < p_Instances.numInstances(); i++) {
                Instance inst = p_Instances.instance(i);
                String id = inst.stringValue(this.m_IndexOfID);
                if (!id2Classes.containsKey(id)) {
                    id2Classes.put(id, new ArrayList<Double>());
                }
                id2Classes.get(id).add(inst.classValue());
            }
        }
        for (int classIdx = 0; classIdx < this.m_Classifiers.length; classIdx++) {
            newInsts = this.genInstances(p_Instances, classIdx, id2Classes);
            this.m_Classifiers[classIdx].buildClassifier(newInsts);
        }
    }
}

From source file:sg.edu.nus.comp.nlp.ims.classifiers.CMultiClassesSVM.java

License:Open Source License

/**
 * generate instances for classifier classIdx
 *
 * @param p_Instances// w  w w. j  a  v a2s  .  c  o m
 *            input instances
 * @param p_ClassIndex
 *            class index
 * @param p_ID2Classes
 *            instance ids
 * @return new instances
 */
protected Instances genInstances(Instances p_Instances, double p_ClassIndex,
        Hashtable<String, ArrayList<Double>> p_ID2Classes) {
    Instances newInsts = new Instances(this.m_OutputFormat, 0);
    for (int i = 0; i < p_Instances.numInstances(); i++) {
        Instance inst = p_Instances.instance(i);
        Instance newInst = null;
        if (SparseInstance.class.isInstance(inst)) {
            newInst = new SparseInstance(inst);
        } else {
            newInst = new Instance(inst);
        }
        if (newInst.value(p_Instances.classIndex()) == p_ClassIndex) {
            newInst.setValue(inst.classIndex(), 1);
        } else {
            if (p_ID2Classes == null || !p_ID2Classes.get(inst.stringValue(this.m_IndexOfID))
                    .contains(new Double(p_ClassIndex))) {
                newInst.setValue(inst.classIndex(), 0);
            } else {
                continue;
            }
        }
        newInst.deleteAttributeAt(this.m_IndexOfID);
        newInst.setDataset(newInsts);
        newInsts.add(newInst);
    }
    return newInsts;
}

From source file:sg.edu.nus.comp.nlp.ims.io.CWekaLexeltWriter.java

License:Open Source License

@Override
public Object getInstances(ILexelt p_Lexelt) throws ClassNotFoundException {
    String relation = p_Lexelt.getID();
    FastVector attributes = new FastVector();
    int capacity = p_Lexelt.size();

    IStatistic stat = p_Lexelt.getStatistic();
    Attribute ids = new Attribute("#ID");
    attributes.addElement(ids);/*from  ww w .ja  v a2  s  .  c o  m*/
    int keySize = stat.getKeys().size();
    for (int keyIdx = 0; keyIdx < keySize; keyIdx++) {
        String key = stat.getKey(keyIdx);
        String type = stat.getType(keyIdx);
        if (ANumericFeature.class.isAssignableFrom(Class.forName(type))) {
            attributes.addElement(new Attribute(key));
        } else {
            FastVector attributeValues = new FastVector();
            List<String> values = stat.getValue(keyIdx);
            for (String value : values) {
                attributeValues.addElement(value);
            }
            if (attributeValues.size() == 0) {
                throw new IllegalStateException("No attribute specified.");
            }
            attributes.addElement(new Attribute(key, attributeValues));
        }
    }
    FastVector attributeValues = new FastVector();
    for (String tag : stat.getTags()) {
        attributeValues.addElement(tag);
    }
    attributes.addElement(new Attribute("#TAG", attributeValues));

    Instances instances = new Instances(relation, attributes, capacity);
    for (int instIdx = 0; instIdx < p_Lexelt.size(); instIdx++) {
        IInstance instance = p_Lexelt.getInstance(instIdx);
        int keyIdx = 0;
        double value;
        IFeature feature;

        int featureSize = instance.size();
        Hashtable<Integer, Double> features = new Hashtable<Integer, Double>();
        ArrayList<Integer> exist = new ArrayList<Integer>();
        for (int featIdx = 0; featIdx < featureSize; featIdx++) {
            feature = instance.getFeature(featIdx);
            keyIdx = stat.getIndex(feature.getKey());
            if (keyIdx < 0) {
                continue;
            }
            if (ANumericFeature.class.isInstance(feature)) {
                value = Double.parseDouble(feature.getValue());
            } else if (ABinaryFeature.class.isInstance(feature)) {
                value = instances.attribute(keyIdx + 1).indexOfValue(feature.getValue());
            } else {
                String fv = feature.getValue();
                if (fv == null || !stat.contains(keyIdx, fv)) {
                    fv = stat.getDefaultValue();
                }
                value = instances.attribute(keyIdx + 1).indexOfValue(fv);
            }
            features.put(keyIdx + 1, value);
            exist.add(keyIdx + 1);
        }
        exist.add(keySize + 1);
        Collections.sort(exist);

        double[] attValues = new double[keySize + 2];
        ids.addStringValue(instance.getID());
        attValues[0] = ids.indexOfValue(instance.getID());
        int begin, end = -1;
        for (int valueIdx = 0; valueIdx < exist.size(); valueIdx++) {
            begin = end + 1;
            end = exist.get(valueIdx);
            for (int i = begin; i < end; i++) {
                if (instances.attribute(i).isNumeric()) {
                    attValues[i] = 0;
                } else {
                    attValues[i] = instances.attribute(i).indexOfValue("0");
                }
            }
            if (end <= keySize) {
                attValues[end] = features.get(end);
            }
        }

        for (String tag : instance.getTag()) {
            if (tag.equals("'?'") || tag.equals("?")) {
                attValues[keySize + 1] = Instance.missingValue();
            } else {
                attValues[keySize + 1] = instances.attribute(keySize + 1).indexOfValue(tag);
            }
            Instance ins = new Instance(1, attValues);
            instances.add(ins);
        }
        if (instance.getTag().size() == 0) {
            attValues[keySize + 1] = Instance.missingValue();
            Instance ins = new Instance(1, attValues);
            instances.add(ins);
        }
    }
    return instances;
}

From source file:sg.edu.nus.comp.nlp.ims.io.CWekaSparseLexeltWriter.java

License:Open Source License

@Override
public Object getInstances(ILexelt p_Lexelt) throws ClassNotFoundException {
    String relation = p_Lexelt.getID();
    FastVector attributes = new FastVector();
    int capacity = p_Lexelt.size();

    IStatistic stat = p_Lexelt.getStatistic();
    Attribute ids = new Attribute("#ID");
    attributes.addElement(ids);//from  www  .j  a  va2s . co m
    int keySize = stat.getKeys().size();
    for (int keyIdx = 0; keyIdx < keySize; keyIdx++) {
        String key = stat.getKey(keyIdx);
        String type = stat.getType(keyIdx);
        if (ANumericFeature.class.isAssignableFrom(Class.forName(type))) {
            attributes.addElement(new Attribute(key));
        } else {
            FastVector attributeValues = new FastVector();
            List<String> values = stat.getValue(keyIdx);
            for (String value : values) {
                attributeValues.addElement(value);
            }
            if (attributeValues.size() == 0) {
                throw new IllegalStateException("No attribute specified.");
            }
            attributes.addElement(new Attribute(key, attributeValues));
        }
    }
    FastVector attributeValues = new FastVector();
    for (String tag : stat.getTags()) {
        attributeValues.addElement(tag);
    }
    attributes.addElement(new Attribute("#TAG", attributeValues));

    Instances instances = new Instances(relation, attributes, capacity);
    for (int instIdx = 0; instIdx < p_Lexelt.size(); instIdx++) {
        IInstance instance = p_Lexelt.getInstance(instIdx);
        int keyIdx = 0;
        double value;
        IFeature feature;

        int featureSize = instance.size();
        Hashtable<Integer, Double> features = new Hashtable<Integer, Double>();
        ArrayList<Integer> exist = new ArrayList<Integer>();
        for (int featIdx = 0; featIdx < featureSize; featIdx++) {
            feature = instance.getFeature(featIdx);
            keyIdx = stat.getIndex(feature.getKey());
            if (keyIdx < 0) {
                continue;
            }
            if (ANumericFeature.class.isInstance(feature)) {
                value = Double.parseDouble(feature.getValue());
            } else if (ABinaryFeature.class.isInstance(feature)) {
                value = instances.attribute(keyIdx + 1).indexOfValue(feature.getValue());
            } else {
                String fv = feature.getValue();
                if (fv == null || !stat.contains(keyIdx, fv)) {
                    fv = stat.getDefaultValue();
                }
                value = instances.attribute(keyIdx + 1).indexOfValue(fv);
            }
            features.put(keyIdx + 1, value);
            exist.add(keyIdx + 1);
        }
        Collections.sort(exist);

        double[] attrValues = new double[exist.size() + 2];
        int[] indices = new int[exist.size() + 2];
        ids.addStringValue(instance.getID());
        attrValues[0] = ids.indexOfValue(instance.getID());
        indices[0] = 0;
        for (int valueIdx = 0; valueIdx < exist.size(); valueIdx++) {
            indices[valueIdx + 1] = exist.get(valueIdx);
            attrValues[valueIdx + 1] = features.get(indices[valueIdx + 1]);
        }
        Attribute tags = instances.attribute(keySize + 1);
        indices[exist.size() + 1] = keySize + 1;
        for (String tag : instance.getTag()) {
            if (tag.equals("'?'") || tag.equals("?")) {
                attrValues[exist.size() + 1] = Instance.missingValue();
            } else {
                attrValues[exist.size() + 1] = tags.indexOfValue(tag);
            }
            SparseInstance ins = new SparseInstance(1, attrValues, indices, keySize + 2);
            instances.add(ins);
        }
        if (instance.getTag().size() == 0) {
            attrValues[exist.size() + 1] = Instance.missingValue();
            SparseInstance ins = new SparseInstance(1, attrValues, indices, keySize + 2);
            instances.add(ins);
        }
    }
    return instances;
}