Example usage for weka.core Instance setValue

List of usage examples for weka.core Instance setValue

Introduction

In this page you can find the example usage for weka.core Instance setValue.

Prototype

public void setValue(Attribute att, String value);

Source Link

Document

Sets a value of an nominal or string attribute to the given value.

Usage

From source file:fk.stardust.localizer.machinelearn.WekaFaultLocalizer.java

License:Open Source License

@Override
public Ranking<T> localize(final ISpectra<T> spectra) {

    // == 1. Create Weka training instance

    final List<INode<T>> nodes = new ArrayList<>(spectra.getNodes());

    // nominal true/false values
    final List<String> tf = new ArrayList<String>();
    tf.add("t");//from ww w.j  av a 2 s  .  co  m
    tf.add("f");

    // create an attribute for each component
    final Map<INode<T>, Attribute> attributeMap = new HashMap<INode<T>, Attribute>();
    final ArrayList<Attribute> attributeList = new ArrayList<Attribute>(); // NOCS: Weka needs ArrayList..
    for (final INode<T> node : nodes) {
        final Attribute attribute = new Attribute(node.toString(), tf);
        attributeList.add(attribute);
        attributeMap.put(node, attribute);
    }

    // create class attribute (trace success)
    final Attribute successAttribute = new Attribute("success", tf);
    attributeList.add(successAttribute);

    // create weka training instance
    final Instances trainingSet = new Instances("TraceInfoInstances", attributeList, 1);
    trainingSet.setClassIndex(attributeList.size() - 1);

    // == 2. add traces to training set

    // add an instance for each trace
    for (final ITrace<T> trace : spectra.getTraces()) {
        final Instance instance = new DenseInstance(nodes.size() + 1);
        instance.setDataset(trainingSet);
        for (final INode<T> node : nodes) {
            instance.setValue(attributeMap.get(node), trace.isInvolved(node) ? "t" : "f");
        }
        instance.setValue(successAttribute, trace.isSuccessful() ? "t" : "f");
        trainingSet.add(instance);
    }

    // == 3. use prediction to localize faults

    // build classifier
    try {
        final Classifier classifier = this.buildClassifier(this.classifierName, this.classifierOptions,
                trainingSet);
        final Ranking<T> ranking = new Ranking<>();

        System.out.println("begin classifying");
        int classified = 0;

        final Instance instance = new DenseInstance(nodes.size() + 1);
        instance.setDataset(trainingSet);
        for (final INode<T> node : nodes) {
            instance.setValue(attributeMap.get(node), "f");
        }
        instance.setValue(successAttribute, "f");

        for (final INode<T> node : nodes) {
            classified++;
            if (classified % 1000 == 0) {
                System.out.println(String.format("Classified %d nodes.", classified));
            }

            // contain only the current node in the network
            instance.setValue(attributeMap.get(node), "t");

            // predict with which probability this setup leads to a failing network
            final double[] distribution = classifier.distributionForInstance(instance);
            ranking.rank(node, distribution[1]);

            // reset involvment for node
            instance.setValue(attributeMap.get(node), "f");
        }
        return ranking;
    } catch (final Exception e) { // NOCS: Weka throws only raw exceptions
        throw new RuntimeException(e);
    }
}

From source file:form.ml.ClassifierTemplate.java

/**
 * make the Instance weka object from a String
 *
 * @param text the String to be converted
 * @return Instance object//from   w w w.  j a  va2s .  c o  m
 */
private Instance makeInstance(String text) {
    Instance instance = new Instance(2);
    Attribute attribute = train.attribute("text");
    instance.setValue(attribute, attribute.addStringValue(text));
    instance.setDataset(train);
    return instance;
}

From source file:fr.loria.synalp.jtrans.phonetiseur.Classifieurs.java

License:Open Source License

private void setval(Instance ii, int i, String s) {
    try {/*www  . j  av  a  2 s .c  o  m*/
        ii.setValue(i, s);
    } catch (IllegalArgumentException e) {
        System.err.println(String.format("setval failed (ii: %s, i: %d, s: %s), %s", ii, i, s, e));
    }
}

From source file:gnusmail.filters.mail_metadata.MultilabelFolder.java

License:Open Source License

@Override
public void updateInstance(Instance inst, Document doc) {
    if (doc instanceof MailMessage) {
        Message m = ((MailMessage) doc).getMessage();
        MessageInfo mi = new MessageInfo(m);
        List<String> listOfFolders = extractFoldersFromHierarchy(mi.getFolderAsString());
        for (Attribute att : attList) {
            if (listOfFolders.contains(att.name())) {
                inst.setValue(att, "1");
            } else {
                inst.setValue(att, "0");
            }/*  w  w  w.ja  v  a 2 s  .  c o  m*/
        }
    }
}

From source file:gnusmail.filters.SingleAttFilter.java

License:Open Source License

@Override
public void updateInstance(Instance inst, Document document) {
    try {/*  ww  w  .j  a v a 2s . c om*/
        int index = attribute.indexOfValue(getSingleValue(document));
        inst.setValue(attribute, index);
    } catch (MessagingException e) {
        inst.setMissing(attribute);
    }
}

From source file:gnusmail.filters.SingleNumericAttFilter.java

License:Open Source License

@Override
public void updateInstance(Instance inst, Document doc) {
    try {//from w w  w.  j  ava  2s.c  o m
        inst.setValue(attribute, getSingleValue(doc));
    } catch (MessagingException e) {
        inst.setMissing(attribute);
    }
}

From source file:gnusmail.filters.WordFrequency.java

License:Open Source License

@Override
public void updateInstance(Instance inst, Document doc) {
    Set<String> stringThisDocument = new HashSet<String>();
    List<Token> tokens = tokenizeDocument(doc);
    for (Token token : tokens) {
        String stemmedForm = token.getStemmedForm();
        if (stemmedForm.length() > 2) {
            stringThisDocument.add(stemmedForm);
        }//from   w w w . j av a2 s.com
    }

    for (Attribute att : attList) {
        if (stringThisDocument.contains(att.name())) { //TODO esto es lento...mejorar. Pondria un getWords en Documnet
            inst.setValue(att, "True");
        } else {
            inst.setValue(att, "False");
        }
    }
}

From source file:gov.va.chir.tagline.dao.DatasetUtil.java

License:Open Source License

public static Instances createDataset(final Collection<Document> documents) {

    // Key = feature name | Value = number representing NUMERIC, NOMINAL, etc.
    final Map<String, Integer> featureType = new TreeMap<String, Integer>();

    // Key = feature name | Values = distinct values for NOMINAL values
    final Map<String, Set<String>> nominalFeatureMap = new HashMap<String, Set<String>>();

    final Set<String> labels = new TreeSet<String>();
    final Set<String> docIds = new TreeSet<String>();

    // First scan -- determine attribute values
    for (Document document : documents) {
        processFeatures(document.getFeatures(), featureType, nominalFeatureMap);
        docIds.add(document.getName());/*from  ww  w.  j a  v  a  2  s  .  c o m*/

        for (Line line : document.getLines()) {
            processFeatures(line.getFeatures(), featureType, nominalFeatureMap);

            labels.add(line.getLabel());
        }
    }

    final ArrayList<Attribute> attributes = new ArrayList<Attribute>();

    // Add Document and Line IDs as first two attributes
    //final Attribute docId = new Attribute(DOC_ID, (ArrayList<String>) null);
    final Attribute docId = new Attribute(DOC_ID, new ArrayList<String>(docIds));
    final Attribute lineId = new Attribute(LINE_ID);

    attributes.add(docId);
    attributes.add(lineId);

    // Build attributes
    for (String feature : featureType.keySet()) {
        final int type = featureType.get(feature);

        if (type == Attribute.NUMERIC) {
            attributes.add(new Attribute(feature));
        } else {
            if (nominalFeatureMap.containsKey(feature)) {
                attributes.add(new Attribute(feature, new ArrayList<String>(nominalFeatureMap.get(feature))));
            }
        }
    }

    // Add class attribute
    Attribute classAttr = new Attribute(LABEL, new ArrayList<String>(labels));
    attributes.add(classAttr);

    final Instances instances = new Instances("train", attributes, documents.size());

    // Second scan -- add data
    for (Document document : documents) {
        final Map<String, Object> docFeatures = document.getFeatures();

        for (Line line : document.getLines()) {
            final Instance instance = new DenseInstance(attributes.size());

            final Map<String, Object> lineFeatures = line.getFeatures();
            lineFeatures.putAll(docFeatures);

            instance.setValue(docId, document.getName());
            instance.setValue(lineId, line.getLineId());
            instance.setValue(classAttr, line.getLabel());

            for (Attribute attribute : attributes) {
                if (!attribute.equals(docId) && !attribute.equals(lineId) && !attribute.equals(classAttr)) {
                    final String name = attribute.name();
                    final Object obj = lineFeatures.get(name);

                    if (obj instanceof Double) {
                        instance.setValue(attribute, ((Double) obj).doubleValue());
                    } else if (obj instanceof Integer) {
                        instance.setValue(attribute, ((Integer) obj).doubleValue());
                    } else {
                        instance.setValue(attribute, obj.toString());
                    }
                }
            }

            instances.add(instance);
        }
    }

    // Set last attribute as class
    instances.setClassIndex(attributes.size() - 1);

    return instances;
}

From source file:gov.va.chir.tagline.dao.DatasetUtil.java

License:Open Source License

@SuppressWarnings("unchecked")
public static Instances createDataset(final Instances header, final Collection<Document> documents)
        throws Exception {

    // Update header to include all docIDs from the passed in documents
    // (Weka requires all values for nominal features)
    final Set<String> docIds = new TreeSet<String>();

    for (Document document : documents) {
        docIds.add(document.getName());/*from   w ww . j a va2  s .  c om*/
    }

    final AddValues avf = new AddValues();
    avf.setLabels(StringUtils.join(docIds, ","));

    // Have to add 1 because SingleIndex.setValue() has a bug, expecting
    // the passed in index to be 1-based rather than 0-based. Why? I have 
    // no idea.
    // Calling path: AddValues.setInputFormat() -->
    //               SingleIndex.setUpper() -->
    //               SingleIndex.setValue()
    avf.setAttributeIndex(String.valueOf(header.attribute(DOC_ID).index() + 1));

    avf.setInputFormat(header);
    final Instances newHeader = Filter.useFilter(header, avf);

    final Instances instances = new Instances(newHeader, documents.size());

    // Map attributes
    final Map<String, Attribute> attrMap = new HashMap<String, Attribute>();

    final Enumeration<Attribute> en = newHeader.enumerateAttributes();

    while (en.hasMoreElements()) {
        final Attribute attr = en.nextElement();

        attrMap.put(attr.name(), attr);
    }

    attrMap.put(newHeader.classAttribute().name(), newHeader.classAttribute());

    final Attribute docId = attrMap.get(DOC_ID);
    final Attribute lineId = attrMap.get(LINE_ID);
    final Attribute classAttr = attrMap.get(LABEL);

    // Add data
    for (Document document : documents) {
        final Map<String, Object> docFeatures = document.getFeatures();

        for (Line line : document.getLines()) {
            final Instance instance = new DenseInstance(attrMap.size());

            final Map<String, Object> lineFeatures = line.getFeatures();
            lineFeatures.putAll(docFeatures);

            instance.setValue(docId, document.getName());
            instance.setValue(lineId, line.getLineId());

            if (line.getLabel() == null) {
                instance.setMissing(classAttr);
            } else {
                instance.setValue(classAttr, line.getLabel());
            }

            for (Attribute attribute : attrMap.values()) {
                if (!attribute.equals(docId) && !attribute.equals(lineId) && !attribute.equals(classAttr)) {
                    final String name = attribute.name();
                    final Object obj = lineFeatures.get(name);

                    if (obj instanceof Double) {
                        instance.setValue(attribute, ((Double) obj).doubleValue());
                    } else if (obj instanceof Integer) {
                        instance.setValue(attribute, ((Integer) obj).doubleValue());
                    } else {
                        instance.setValue(attribute, obj.toString());
                    }
                }
            }

            instances.add(instance);
        }
    }

    // Set last attribute as class
    instances.setClassIndex(attrMap.size() - 1);

    return instances;
}

From source file:gr.demokritos.iit.cpgislanddetection.io.FileCreatorARFF.java

public Instances createARFF(List<Vector<Integer>> listVector, String nameClass) throws ParseException {

    // Declare four numeric attributes
    Attribute Attribute1 = new Attribute("adenine");
    Attribute Attribute2 = new Attribute("thymine");
    Attribute Attribute3 = new Attribute("cytosine");
    Attribute Attribute4 = new Attribute("guanine");

    // Declare the class attribute along with its values
    FastVector fvClassVal = new FastVector(2);
    fvClassVal.addElement("yes");
    fvClassVal.addElement("no");
    Attribute ClassAttribute = new Attribute("theClass", fvClassVal);

    // Declare the feature vector
    FastVector fvWekaAttributes = new FastVector(5);
    fvWekaAttributes.addElement(Attribute1);
    fvWekaAttributes.addElement(Attribute2);
    fvWekaAttributes.addElement(Attribute3);
    fvWekaAttributes.addElement(Attribute4);
    fvWekaAttributes.addElement(ClassAttribute);

    // Create an empty training set
    int capacity = listVector.size() + 7;
    Instances isTrainingSet = new Instances("isCpG", fvWekaAttributes, capacity);

    // Set class index
    isTrainingSet.setClassIndex(4);// w  ww.j av a2s.  c  om

    // Create the instances from the file with vectors
    for (int i = 0; i < listVector.size(); i++) {
        Instance instance = new Instance(5);
        instance.setValue((Attribute) fvWekaAttributes.elementAt(0), listVector.get(i).get(0));
        instance.setValue((Attribute) fvWekaAttributes.elementAt(1), listVector.get(i).get(1));
        instance.setValue((Attribute) fvWekaAttributes.elementAt(2), listVector.get(i).get(2));
        instance.setValue((Attribute) fvWekaAttributes.elementAt(3), listVector.get(i).get(3));
        instance.setValue((Attribute) fvWekaAttributes.elementAt(4), nameClass);

        //add the instance in training set
        isTrainingSet.add(instance);

    }
    System.out.println(isTrainingSet);
    return isTrainingSet;
}