List of usage examples for weka.core Instance setClassMissing
public void setClassMissing();
From source file:core.me.Context.java
License:Open Source License
public double[] getVirtualRelationship(Context p) throws Exception { Classifiers cc = Classifiers.get();// w w w . j av a 2 s.c om Instances dataStruc = cc.getDataStructRC(); double H = 0, D = 0, DX = 0.; int parentClass = 1; H = this.getH(p); D = this.getD(p); DX = this.getDX(p); parentClass = p.getSymbolClass(); double[] values = new double[6]; values[0] = H; values[1] = D; values[2] = DX; values[3] = dataStruc.attribute(3).indexOfValue("" + this.theClass.get()); values[4] = dataStruc.attribute(4).indexOfValue("" + parentClass); values[5] = dataStruc.attribute(5).indexOfValue("0"); Instance inst = new Instance(1.0, values); inst.setDataset(dataStruc); inst.setClassMissing(); return cc.getVirtualRelationship(inst); }
From source file:cotraining.copy.Evaluation_D.java
License:Open Source License
/** * Evaluates the classifier on a single instance and records the * prediction (if the class is nominal). * * @param classifier machine learning classifier * @param instance the test instance to be classified * @return the prediction made by the clasifier * @throws Exception if model could not be evaluated * successfully or the data contains string attributes *///from www . j a v a 2s. c o m public double evaluateModelOnceAndRecordPrediction(Classifier classifier, Instance instance) throws Exception { Instance classMissing = (Instance) instance.copy(); double pred = 0; classMissing.setDataset(instance.dataset()); classMissing.setClassMissing(); if (m_ClassIsNominal) { if (m_Predictions == null) { m_Predictions = new FastVector(); } double[] dist = classifier.distributionForInstance(classMissing); pred = Utils.maxIndex(dist); if (dist[(int) pred] <= 0) { pred = Instance.missingValue(); } updateStatsForClassifier(dist, instance); m_Predictions.addElement(new NominalPrediction(instance.classValue(), dist, instance.weight())); } else { pred = classifier.classifyInstance(classMissing); updateStatsForPredictor(pred, instance); } return pred; }
From source file:cyber009.udal.functions.StatisticalAnalysis.java
/** * // w w w. j a v a 2s. c om * @param classifier * @param trainingDataSet * @param unLabelDataSets * @param unLabelSet * @param classTarget * @return */ public double conditionalEntropy(Classifier classifier, Instances trainingDataSet, Instances unLabelDataSets, Instance unLabelSet, double classTarget) { double cEnt = 0.0D; double entropy = 0.0D; unLabelSet.setClassValue(classTarget); trainingDataSet.add(trainingDataSet.numInstances(), unLabelSet); AttributeStats classStats = trainingDataSet.attributeStats(trainingDataSet.classIndex()); for (Instance set : unLabelDataSets) { if (instanceCMPWithoutClass(set, unLabelSet) == true) continue; for (int i = 0; i < classStats.nominalCounts.length; i++) { double target = new Double(trainingDataSet.attribute(trainingDataSet.classIndex()).value(i)); set.setClassValue(target); entropy = posteriorDistribution(classifier, trainingDataSet, set, classTarget); //System.out.println("entropy:"+entropy); cEnt += -(entropy) * Math.log10(entropy); set.setClassMissing(); } } trainingDataSet.remove(trainingDataSet.numInstances() - 1); return cEnt; }
From source file:de.uni_koeln.phil_fak.iv.tm.p4.classification.WekaAdapter.java
License:Open Source License
private Instance instance(Document document, String label) { List<Float> values = document.getVector(corpus).getValues(); /* Die Instanz enthlt alle Merkmale plus die Klasse: */ double[] vals = new double[values.size() + 1]; for (int i = 0; i < values.size(); i++) { vals[i + 1] = values.get(i);// w ww .ja va2 s . co m } Instance instance = new Instance(1, vals); /* * Und muss erfahren, was die Werte bedeuten, was wir fr unser * Trainingsset beschrieben hatten: */ instance.setDataset(trainingSet); /* * Beim Training haben wir Instanzen mit vorhandenem Klassenlabel, bei * der Klassifikation ist die Klasse unbekannt: */ if (label == null) { instance.setClassMissing(); // during classification } else instance.setClassValue(label); // during training return instance; }
From source file:de.uni_koeln.spinfo.classification.zoneAnalysis.classifier.WekaClassifier.java
License:Open Source License
private Instance instance(ClassifyUnit cu, Instances trainingSet) { double[] values = cu.getFeatureVector(); String classID = ((ZoneClassifyUnit) cu).getActualClassID() + ""; Instance instance = new SparseInstance(1, values); /*//from w w w .ja v a 2s . com * Weka muss 'erklrt' bekommen, was die Werte bedeuten - dies ist im Trainingsset beschrieben: */ instance.setDataset(trainingSet); /* * Beim Training geben wir den Instanzen ein Klassenlabel, bei der Klassifikation ist die Klasse unbekannt: */ if (classID == "0") { instance.setClassMissing(); // bei Klassifikation } else instance.setClassValue(classID); // beim Training return instance; }
From source file:edu.brandeis.wisedb.scheduler.training.decisiontree.DTSearcher.java
License:Open Source License
@Override public List<Action> schedule(Set<ModelQuery> toSched) { SingularMachineState start = new SingularMachineState(toSched, qtp, sla); List<Action> toR = new LinkedList<Action>(); applyLoop: while (!start.isGoalState()) { log.fine("Current state: " + start); SortedMap<String, String> features = start.getFeatures(); Instance toClassify = new Instance(attributes.length); toClassify.setDataset(wekaDataSet); for (Attribute a : attributes) { if (a.name().equals("action")) { //toClassify.setValue(a, "N"); continue; }/* ww w . j a v a2 s . c o m*/ try { if (features.get(a.name()).equals("?")) { toClassify.setMissing(a); continue; } try { double d = Double.valueOf(features.get(a.name())); toClassify.setValue(a, d); } catch (NumberFormatException e) { toClassify.setValue(a, features.get(a.name())); } } catch (IllegalArgumentException e) { e.printStackTrace(); log.warning( "Encountered previously unseen attribute value! Might need better training data... making random selection."); log.warning("Value for attribute " + a.name() + " was " + features.get(a.name())); Action rand = getPUAction(start); log.warning("Random action selected: " + rand); toR.add(rand); start.applyAction(rand); continue applyLoop; } } toClassify.setClassMissing(); log.finer("Going to classify: " + toClassify); try { double d = tree.classifyInstance(toClassify); toClassify.setClassValue(d); String action = toClassify.stringValue(toClassify.classIndex()); log.finer("Got action string: " + action); Action selected = null; for (Action a : start.getPossibleActions()) { if (actionMatches(a, action)) { selected = a; break; } } if (selected == null) { //log.warning("Could not find applicable action for string: " + action + " ... picking random action"); Action a = getPUAction(start); start.applyAction(a); toR.add(a); continue; } log.fine("Selected action: " + selected); start.applyAction(selected); toR.add(selected); } catch (Exception e) { // TODO Auto-generated catch block e.printStackTrace(); return null; } } return toR; }
From source file:edu.illinois.cs.cogcomp.lbjava.learn.WekaWrapper.java
License:Open Source License
/** * Creates a WEKA Instance object out of a {@link FeatureVector}. **//*from ww w. ja v a2 s .co m*/ private Instance makeInstance(int[] exampleFeatures, double[] exampleValues, int[] exampleLabels, double[] labelValues) { // Make sure attributeInfo has been filled if (attributeInfo.size() == 0) { System.err.println("WekaWrapper: Error - makeInstance was called while attributeInfo " + "was empty."); new Exception().printStackTrace(); System.exit(1); } // Initialize an Instance object Instance inst = new Instance(attributeInfo.size()); // Acknowledge that this instance will be a member of our dataset // 'instances' inst.setDataset(instances); // Assign values for its attributes /* * Since we are iterating through this example's feature list, which does not contain the * label feature (the label feature is the first in the 'attribute' list), we start attIndex * at 1, while we start featureIndex at 0. */ for (int featureIndex = 0, attIndex = 1; featureIndex < exampleFeatures.length; ++featureIndex, ++attIndex) { Feature f = (Feature) lexicon.lookupKey(exampleFeatures[featureIndex]); Attribute att = (Attribute) attributeInfo.elementAt(attIndex); // make sure the feature's identifier and the attribute's name match if (!(att.name().equals(f.getStringIdentifier()))) { System.err.println( "WekaWrapper: Error - makeInstance encountered a misaligned " + "attribute-feature pair."); System.err.println( " " + att.name() + " and " + f.getStringIdentifier() + " should have been identical."); new Exception().printStackTrace(); System.exit(1); } if (!f.isDiscrete()) inst.setValue(attIndex, exampleValues[featureIndex]); else { // it's a discrete or conjunctive feature. String attValue = f.totalValues() == 2 ? att.value((int) exampleValues[featureIndex]) : f.getStringValue(); inst.setValue(attIndex, attValue); } } /* * Here, we assume that if either the labels FeatureVector is empty of features, or is null, * then this example is to be considered unlabeled. */ if (exampleLabels.length == 0) { inst.setClassMissing(); } else if (exampleLabels.length > 1) { System.err.println("WekaWrapper: Error - Weka Instances may only take a single class " + "value, "); new Exception().printStackTrace(); System.exit(1); } else { Feature label = labelLexicon.lookupKey(exampleLabels[0]); // make sure the name of the label feature matches the name of the 0'th // attribute if (!(label.getStringIdentifier().equals(((Attribute) attributeInfo.elementAt(0)).name()))) { System.err.println("WekaWrapper: Error - makeInstance found the wrong label name."); new Exception().printStackTrace(); System.exit(1); } if (!label.isDiscrete()) inst.setValue(0, labelValues[0]); else inst.setValue(0, label.getStringValue()); } return inst; }
From source file:edu.illinois.cs.cogcomp.saul.learn.SaulWekaWrapper.java
License:Open Source License
/** * Creates a WEKA Instance object out of a {@link FeatureVector}. **//*from w ww .ja v a2s.c om*/ private Instance makeInstance(LBJavaInstance instance) { // Initialize an Instance object Instance inst = new Instance(attributeInfo.size()); // Acknowledge that this instance will be a member of our dataset 'wekaInstances' inst.setDataset(wekaInstances); // set all nominal feature values to 0, which means those features are not used in this example for (int i = 1; i < attributeInfo.size(); i++) if (inst.attribute(i).isNominal()) inst.setValue(i, "0"); // Assign values for its attributes /* * Since we are iterating through this example's feature list, which does not contain the * label feature (the label feature is the first in the 'attribute' list), we set attIndex * to at exampleFeatures[featureIndices] + 1, while we start featureIndices at 0. */ for (int featureIndex = 0; featureIndex < instance.featureIndices.length; ++featureIndex) { int attIndex = instance.featureIndices[featureIndex] + 1; Feature f = lexicon.lookupKey(instance.featureIndices[featureIndex]); // if the feature does not exist, do nothing. this may occur in test set. if (f == null) continue; Attribute att = (Attribute) attributeInfo.elementAt(attIndex); // make sure the feature and the attribute match if (!(att.name().equals(f.toString()))) { System.err.println( "WekaWrapper: Error - makeInstance encountered a misaligned " + "attribute-feature pair."); System.err.println(" " + att.name() + " and " + f.toString() + " should have been identical."); new Exception().printStackTrace(); System.exit(1); } if (f.isDiscrete()) inst.setValue(attIndex, "1"); // this feature is used in this example so we set it to "1" else inst.setValue(attIndex, instance.featureValues[featureIndex]); } /* * Here, we assume that if either the labels FeatureVector is empty of features, or is null, * then this example is to be considered unlabeled. */ if (instance.labelIndices.length == 0) { inst.setClassMissing(); } else if (instance.labelIndices.length > 1) { System.err.println("WekaWrapper: Error - Weka Instances may only take a single class " + "value, "); new Exception().printStackTrace(); System.exit(1); } else { Feature label = labelLexicon.lookupKey(instance.labelIndices[0]); // make sure the label feature matches the n 0'th attribute if (!(label.getGeneratingClassifier().equals(((Attribute) attributeInfo.elementAt(0)).name()))) { System.err.println("WekaWrapper: Error - makeInstance found the wrong label name."); new Exception().printStackTrace(); System.exit(1); } if (!label.isDiscrete()) inst.setValue(0, instance.labelValues[0]); else inst.setValue(0, label.getStringValue()); } return inst; }
From source file:elh.eus.absa.WekaWrapper.java
License:Open Source License
/** * Train one vs all models over the given training data. * //from w w w . ja v a 2 s . c om * @param modelpath directory to store each model for the one vs. all method * @param prefix prefix the models should have (each model will have the name of its class appended * @throws Exception */ public HashMap<Integer, HashMap<String, Double>> predictOneVsAll(String modelpath, String prefix) throws Exception { HashMap<Integer, HashMap<String, Double>> rslt = new HashMap<Integer, HashMap<String, Double>>(); if ((testdata == null) || testdata.isEmpty()) { System.err.println("WekaWrapper: testModel() - no test data available, model won't be evaluated"); System.exit(9); } Enumeration<Object> classValues = traindata.classAttribute().enumerateValues(); HashMap<String, Classifier> cls = new HashMap<String, Classifier>(); while (classValues.hasMoreElements()) { String v = (String) classValues.nextElement(); //needed because of weka's sparse data format problems THIS IS TROUBLE! ... if (v.equalsIgnoreCase("dummy")) { continue; } try { Classifier cl = loadModel(modelpath + File.separator + prefix + "_" + v + ".model"); cls.put(v, cl); } catch (Exception e) { System.err.println("classifier for class " + v + " could not be loaded, prediction aborted"); System.exit(9); } } for (int i = 0; i < testdata.numInstances(); i++) { HashMap<String, Double> clResults = new HashMap<String, Double>(); Instance inst = testdata.instance(i); int instId = (int) inst.value(testdata.attribute("instanceId").index()); inst.setClassMissing(); for (String currentClass : cls.keySet()) { double[] dist = cls.get(currentClass).distributionForInstance(inst); String[] classes = { "dummy", currentClass, "UNKNOWN" }; System.out.print("instance " + instId + " (" + currentClass + ") --> "); for (int c = 0; c < dist.length; c++) { System.out.print("\t cl_" + c + " (" + classes[c] + ") = " + dist[c] + "; "); } System.out.print("\n"); //first class is always the class to identify, if unknown class has better score store -1 for the class clResults.put(currentClass, dist[1]); } rslt.put(instId, clResults); } return rslt; }
From source file:elh.eus.absa.WekaWrapper.java
License:Open Source License
/** * Train one vs all models over the given training data. * // w ww . j a v a 2 s .c om * @param modelpath directory to store each model for the one vs. all method * @param prefix prefix the models should have (each model will have the name of its class appended * @throws Exception */ public HashMap<Integer, HashMap<String, Double>> addOneVsAllPredictions(String modelpath, String prefix, double thres) throws Exception { HashMap<Integer, HashMap<String, Double>> rslt = new HashMap<Integer, HashMap<String, Double>>(); if ((testdata == null) || testdata.isEmpty()) { System.err.println("WekaWrapper: testModel() - no test data available, model won't be evaluated"); System.exit(9); } Enumeration<Object> classValues = traindata.classAttribute().enumerateValues(); HashMap<String, Classifier> cls = new HashMap<String, Classifier>(); while (classValues.hasMoreElements()) { String v = (String) classValues.nextElement(); //needed because of weka's sparse data format problems THIS IS TROUBLE! ... if (v.equalsIgnoreCase("dummy")) { continue; } try { Classifier cl = loadModel(modelpath + File.separator + prefix + "_" + v + ".model"); cls.put(v, cl); } catch (Exception e) { System.err.println("classifier for class " + v + " could not be loaded, prediction aborted"); System.exit(9); } } for (int i = 0; i < testdata.numInstances(); i++) { HashMap<String, Double> clResults = new HashMap<String, Double>(); Instance inst = testdata.instance(i); int instId = (int) inst.value(testdata.attribute("instanceId").index()); inst.setClassMissing(); for (String currentClass : cls.keySet()) { double[] dist = cls.get(currentClass).distributionForInstance(inst); System.out.print("instance " + instId + " (" + currentClass + ") --> \n"); /* for (int c=0; c<dist.length; c++) { System.out.print("\t cl_"+c+" ("+") = "+dist[c]+"; "); } System.out.print("\n"); */ //first class is always the class to identify, if unknown class has better score store -1 for the class clResults.put(currentClass, dist[1]); } rslt.put(instId, clResults); } return rslt; }