List of usage examples for weka.core Instance setValue
public void setValue(Attribute att, String value);
From source file:fk.stardust.localizer.machinelearn.WekaFaultLocalizer.java
License:Open Source License
@Override public Ranking<T> localize(final ISpectra<T> spectra) { // == 1. Create Weka training instance final List<INode<T>> nodes = new ArrayList<>(spectra.getNodes()); // nominal true/false values final List<String> tf = new ArrayList<String>(); tf.add("t");//from ww w.j av a 2 s . co m tf.add("f"); // create an attribute for each component final Map<INode<T>, Attribute> attributeMap = new HashMap<INode<T>, Attribute>(); final ArrayList<Attribute> attributeList = new ArrayList<Attribute>(); // NOCS: Weka needs ArrayList.. for (final INode<T> node : nodes) { final Attribute attribute = new Attribute(node.toString(), tf); attributeList.add(attribute); attributeMap.put(node, attribute); } // create class attribute (trace success) final Attribute successAttribute = new Attribute("success", tf); attributeList.add(successAttribute); // create weka training instance final Instances trainingSet = new Instances("TraceInfoInstances", attributeList, 1); trainingSet.setClassIndex(attributeList.size() - 1); // == 2. add traces to training set // add an instance for each trace for (final ITrace<T> trace : spectra.getTraces()) { final Instance instance = new DenseInstance(nodes.size() + 1); instance.setDataset(trainingSet); for (final INode<T> node : nodes) { instance.setValue(attributeMap.get(node), trace.isInvolved(node) ? "t" : "f"); } instance.setValue(successAttribute, trace.isSuccessful() ? "t" : "f"); trainingSet.add(instance); } // == 3. use prediction to localize faults // build classifier try { final Classifier classifier = this.buildClassifier(this.classifierName, this.classifierOptions, trainingSet); final Ranking<T> ranking = new Ranking<>(); System.out.println("begin classifying"); int classified = 0; final Instance instance = new DenseInstance(nodes.size() + 1); instance.setDataset(trainingSet); for (final INode<T> node : nodes) { instance.setValue(attributeMap.get(node), "f"); } instance.setValue(successAttribute, "f"); for (final INode<T> node : nodes) { classified++; if (classified % 1000 == 0) { System.out.println(String.format("Classified %d nodes.", classified)); } // contain only the current node in the network instance.setValue(attributeMap.get(node), "t"); // predict with which probability this setup leads to a failing network final double[] distribution = classifier.distributionForInstance(instance); ranking.rank(node, distribution[1]); // reset involvment for node instance.setValue(attributeMap.get(node), "f"); } return ranking; } catch (final Exception e) { // NOCS: Weka throws only raw exceptions throw new RuntimeException(e); } }
From source file:form.ml.ClassifierTemplate.java
/** * make the Instance weka object from a String * * @param text the String to be converted * @return Instance object//from w w w. j a va2s . c o m */ private Instance makeInstance(String text) { Instance instance = new Instance(2); Attribute attribute = train.attribute("text"); instance.setValue(attribute, attribute.addStringValue(text)); instance.setDataset(train); return instance; }
From source file:fr.loria.synalp.jtrans.phonetiseur.Classifieurs.java
License:Open Source License
private void setval(Instance ii, int i, String s) { try {/*www . j av a 2 s .c o m*/ ii.setValue(i, s); } catch (IllegalArgumentException e) { System.err.println(String.format("setval failed (ii: %s, i: %d, s: %s), %s", ii, i, s, e)); } }
From source file:gnusmail.filters.mail_metadata.MultilabelFolder.java
License:Open Source License
@Override public void updateInstance(Instance inst, Document doc) { if (doc instanceof MailMessage) { Message m = ((MailMessage) doc).getMessage(); MessageInfo mi = new MessageInfo(m); List<String> listOfFolders = extractFoldersFromHierarchy(mi.getFolderAsString()); for (Attribute att : attList) { if (listOfFolders.contains(att.name())) { inst.setValue(att, "1"); } else { inst.setValue(att, "0"); }/* w w w.ja v a 2 s . c o m*/ } } }
From source file:gnusmail.filters.SingleAttFilter.java
License:Open Source License
@Override public void updateInstance(Instance inst, Document document) { try {/* ww w .j a v a 2s . c om*/ int index = attribute.indexOfValue(getSingleValue(document)); inst.setValue(attribute, index); } catch (MessagingException e) { inst.setMissing(attribute); } }
From source file:gnusmail.filters.SingleNumericAttFilter.java
License:Open Source License
@Override public void updateInstance(Instance inst, Document doc) { try {//from w w w. j ava 2s.c o m inst.setValue(attribute, getSingleValue(doc)); } catch (MessagingException e) { inst.setMissing(attribute); } }
From source file:gnusmail.filters.WordFrequency.java
License:Open Source License
@Override public void updateInstance(Instance inst, Document doc) { Set<String> stringThisDocument = new HashSet<String>(); List<Token> tokens = tokenizeDocument(doc); for (Token token : tokens) { String stemmedForm = token.getStemmedForm(); if (stemmedForm.length() > 2) { stringThisDocument.add(stemmedForm); }//from w w w . j av a2 s.com } for (Attribute att : attList) { if (stringThisDocument.contains(att.name())) { //TODO esto es lento...mejorar. Pondria un getWords en Documnet inst.setValue(att, "True"); } else { inst.setValue(att, "False"); } } }
From source file:gov.va.chir.tagline.dao.DatasetUtil.java
License:Open Source License
public static Instances createDataset(final Collection<Document> documents) { // Key = feature name | Value = number representing NUMERIC, NOMINAL, etc. final Map<String, Integer> featureType = new TreeMap<String, Integer>(); // Key = feature name | Values = distinct values for NOMINAL values final Map<String, Set<String>> nominalFeatureMap = new HashMap<String, Set<String>>(); final Set<String> labels = new TreeSet<String>(); final Set<String> docIds = new TreeSet<String>(); // First scan -- determine attribute values for (Document document : documents) { processFeatures(document.getFeatures(), featureType, nominalFeatureMap); docIds.add(document.getName());/*from ww w. j a v a 2 s . c o m*/ for (Line line : document.getLines()) { processFeatures(line.getFeatures(), featureType, nominalFeatureMap); labels.add(line.getLabel()); } } final ArrayList<Attribute> attributes = new ArrayList<Attribute>(); // Add Document and Line IDs as first two attributes //final Attribute docId = new Attribute(DOC_ID, (ArrayList<String>) null); final Attribute docId = new Attribute(DOC_ID, new ArrayList<String>(docIds)); final Attribute lineId = new Attribute(LINE_ID); attributes.add(docId); attributes.add(lineId); // Build attributes for (String feature : featureType.keySet()) { final int type = featureType.get(feature); if (type == Attribute.NUMERIC) { attributes.add(new Attribute(feature)); } else { if (nominalFeatureMap.containsKey(feature)) { attributes.add(new Attribute(feature, new ArrayList<String>(nominalFeatureMap.get(feature)))); } } } // Add class attribute Attribute classAttr = new Attribute(LABEL, new ArrayList<String>(labels)); attributes.add(classAttr); final Instances instances = new Instances("train", attributes, documents.size()); // Second scan -- add data for (Document document : documents) { final Map<String, Object> docFeatures = document.getFeatures(); for (Line line : document.getLines()) { final Instance instance = new DenseInstance(attributes.size()); final Map<String, Object> lineFeatures = line.getFeatures(); lineFeatures.putAll(docFeatures); instance.setValue(docId, document.getName()); instance.setValue(lineId, line.getLineId()); instance.setValue(classAttr, line.getLabel()); for (Attribute attribute : attributes) { if (!attribute.equals(docId) && !attribute.equals(lineId) && !attribute.equals(classAttr)) { final String name = attribute.name(); final Object obj = lineFeatures.get(name); if (obj instanceof Double) { instance.setValue(attribute, ((Double) obj).doubleValue()); } else if (obj instanceof Integer) { instance.setValue(attribute, ((Integer) obj).doubleValue()); } else { instance.setValue(attribute, obj.toString()); } } } instances.add(instance); } } // Set last attribute as class instances.setClassIndex(attributes.size() - 1); return instances; }
From source file:gov.va.chir.tagline.dao.DatasetUtil.java
License:Open Source License
@SuppressWarnings("unchecked") public static Instances createDataset(final Instances header, final Collection<Document> documents) throws Exception { // Update header to include all docIDs from the passed in documents // (Weka requires all values for nominal features) final Set<String> docIds = new TreeSet<String>(); for (Document document : documents) { docIds.add(document.getName());/*from w ww . j a va2 s . c om*/ } final AddValues avf = new AddValues(); avf.setLabels(StringUtils.join(docIds, ",")); // Have to add 1 because SingleIndex.setValue() has a bug, expecting // the passed in index to be 1-based rather than 0-based. Why? I have // no idea. // Calling path: AddValues.setInputFormat() --> // SingleIndex.setUpper() --> // SingleIndex.setValue() avf.setAttributeIndex(String.valueOf(header.attribute(DOC_ID).index() + 1)); avf.setInputFormat(header); final Instances newHeader = Filter.useFilter(header, avf); final Instances instances = new Instances(newHeader, documents.size()); // Map attributes final Map<String, Attribute> attrMap = new HashMap<String, Attribute>(); final Enumeration<Attribute> en = newHeader.enumerateAttributes(); while (en.hasMoreElements()) { final Attribute attr = en.nextElement(); attrMap.put(attr.name(), attr); } attrMap.put(newHeader.classAttribute().name(), newHeader.classAttribute()); final Attribute docId = attrMap.get(DOC_ID); final Attribute lineId = attrMap.get(LINE_ID); final Attribute classAttr = attrMap.get(LABEL); // Add data for (Document document : documents) { final Map<String, Object> docFeatures = document.getFeatures(); for (Line line : document.getLines()) { final Instance instance = new DenseInstance(attrMap.size()); final Map<String, Object> lineFeatures = line.getFeatures(); lineFeatures.putAll(docFeatures); instance.setValue(docId, document.getName()); instance.setValue(lineId, line.getLineId()); if (line.getLabel() == null) { instance.setMissing(classAttr); } else { instance.setValue(classAttr, line.getLabel()); } for (Attribute attribute : attrMap.values()) { if (!attribute.equals(docId) && !attribute.equals(lineId) && !attribute.equals(classAttr)) { final String name = attribute.name(); final Object obj = lineFeatures.get(name); if (obj instanceof Double) { instance.setValue(attribute, ((Double) obj).doubleValue()); } else if (obj instanceof Integer) { instance.setValue(attribute, ((Integer) obj).doubleValue()); } else { instance.setValue(attribute, obj.toString()); } } } instances.add(instance); } } // Set last attribute as class instances.setClassIndex(attrMap.size() - 1); return instances; }
From source file:gr.demokritos.iit.cpgislanddetection.io.FileCreatorARFF.java
public Instances createARFF(List<Vector<Integer>> listVector, String nameClass) throws ParseException { // Declare four numeric attributes Attribute Attribute1 = new Attribute("adenine"); Attribute Attribute2 = new Attribute("thymine"); Attribute Attribute3 = new Attribute("cytosine"); Attribute Attribute4 = new Attribute("guanine"); // Declare the class attribute along with its values FastVector fvClassVal = new FastVector(2); fvClassVal.addElement("yes"); fvClassVal.addElement("no"); Attribute ClassAttribute = new Attribute("theClass", fvClassVal); // Declare the feature vector FastVector fvWekaAttributes = new FastVector(5); fvWekaAttributes.addElement(Attribute1); fvWekaAttributes.addElement(Attribute2); fvWekaAttributes.addElement(Attribute3); fvWekaAttributes.addElement(Attribute4); fvWekaAttributes.addElement(ClassAttribute); // Create an empty training set int capacity = listVector.size() + 7; Instances isTrainingSet = new Instances("isCpG", fvWekaAttributes, capacity); // Set class index isTrainingSet.setClassIndex(4);// w ww.j av a2s. c om // Create the instances from the file with vectors for (int i = 0; i < listVector.size(); i++) { Instance instance = new Instance(5); instance.setValue((Attribute) fvWekaAttributes.elementAt(0), listVector.get(i).get(0)); instance.setValue((Attribute) fvWekaAttributes.elementAt(1), listVector.get(i).get(1)); instance.setValue((Attribute) fvWekaAttributes.elementAt(2), listVector.get(i).get(2)); instance.setValue((Attribute) fvWekaAttributes.elementAt(3), listVector.get(i).get(3)); instance.setValue((Attribute) fvWekaAttributes.elementAt(4), nameClass); //add the instance in training set isTrainingSet.add(instance); } System.out.println(isTrainingSet); return isTrainingSet; }