Example usage for weka.core Instance setClassMissing

List of usage examples for weka.core Instance setClassMissing

Introduction

In this page you can find the example usage for weka.core Instance setClassMissing.

Prototype

public void setClassMissing();

Source Link

Document

Sets the class value of an instance to be "missing".

Usage

From source file:core.me.Context.java

License:Open Source License

public double[] getVirtualRelationship(Context p) throws Exception {

    Classifiers cc = Classifiers.get();//  w  w  w  . j  av a  2 s.c om
    Instances dataStruc = cc.getDataStructRC();

    double H = 0, D = 0, DX = 0.;
    int parentClass = 1;

    H = this.getH(p);
    D = this.getD(p);
    DX = this.getDX(p);
    parentClass = p.getSymbolClass();

    double[] values = new double[6];
    values[0] = H;
    values[1] = D;
    values[2] = DX;
    values[3] = dataStruc.attribute(3).indexOfValue("" + this.theClass.get());
    values[4] = dataStruc.attribute(4).indexOfValue("" + parentClass);
    values[5] = dataStruc.attribute(5).indexOfValue("0");

    Instance inst = new Instance(1.0, values);
    inst.setDataset(dataStruc);
    inst.setClassMissing();

    return cc.getVirtualRelationship(inst);
}

From source file:cotraining.copy.Evaluation_D.java

License:Open Source License

/**
 * Evaluates the classifier on a single instance and records the
 * prediction (if the class is nominal).
 *
 * @param classifier machine learning classifier
 * @param instance the test instance to be classified
 * @return the prediction made by the clasifier
 * @throws Exception if model could not be evaluated 
 * successfully or the data contains string attributes
 *///from   www .  j  a  v  a 2s. c o m
public double evaluateModelOnceAndRecordPrediction(Classifier classifier, Instance instance) throws Exception {

    Instance classMissing = (Instance) instance.copy();
    double pred = 0;
    classMissing.setDataset(instance.dataset());
    classMissing.setClassMissing();
    if (m_ClassIsNominal) {
        if (m_Predictions == null) {
            m_Predictions = new FastVector();
        }
        double[] dist = classifier.distributionForInstance(classMissing);
        pred = Utils.maxIndex(dist);
        if (dist[(int) pred] <= 0) {
            pred = Instance.missingValue();
        }
        updateStatsForClassifier(dist, instance);
        m_Predictions.addElement(new NominalPrediction(instance.classValue(), dist, instance.weight()));
    } else {
        pred = classifier.classifyInstance(classMissing);
        updateStatsForPredictor(pred, instance);
    }
    return pred;
}

From source file:cyber009.udal.functions.StatisticalAnalysis.java

/**
 * // w  w  w.  j a v  a  2s. c om
 * @param classifier
 * @param trainingDataSet
 * @param unLabelDataSets
 * @param unLabelSet
 * @param classTarget
 * @return 
 */
public double conditionalEntropy(Classifier classifier, Instances trainingDataSet, Instances unLabelDataSets,
        Instance unLabelSet, double classTarget) {
    double cEnt = 0.0D;
    double entropy = 0.0D;
    unLabelSet.setClassValue(classTarget);
    trainingDataSet.add(trainingDataSet.numInstances(), unLabelSet);
    AttributeStats classStats = trainingDataSet.attributeStats(trainingDataSet.classIndex());
    for (Instance set : unLabelDataSets) {
        if (instanceCMPWithoutClass(set, unLabelSet) == true)
            continue;
        for (int i = 0; i < classStats.nominalCounts.length; i++) {
            double target = new Double(trainingDataSet.attribute(trainingDataSet.classIndex()).value(i));
            set.setClassValue(target);
            entropy = posteriorDistribution(classifier, trainingDataSet, set, classTarget);
            //System.out.println("entropy:"+entropy);
            cEnt += -(entropy) * Math.log10(entropy);
            set.setClassMissing();
        }
    }
    trainingDataSet.remove(trainingDataSet.numInstances() - 1);
    return cEnt;
}

From source file:de.uni_koeln.phil_fak.iv.tm.p4.classification.WekaAdapter.java

License:Open Source License

private Instance instance(Document document, String label) {
    List<Float> values = document.getVector(corpus).getValues();
    /* Die Instanz enthlt alle Merkmale plus die Klasse: */
    double[] vals = new double[values.size() + 1];
    for (int i = 0; i < values.size(); i++) {
        vals[i + 1] = values.get(i);// w ww  .ja  va2 s  . co  m
    }
    Instance instance = new Instance(1, vals);
    /*
     * Und muss erfahren, was die Werte bedeuten, was wir fr unser
     * Trainingsset beschrieben hatten:
     */
    instance.setDataset(trainingSet);
    /*
     * Beim Training haben wir Instanzen mit vorhandenem Klassenlabel, bei
     * der Klassifikation ist die Klasse unbekannt:
     */
    if (label == null) {
        instance.setClassMissing(); // during classification
    } else
        instance.setClassValue(label); // during training
    return instance;
}

From source file:de.uni_koeln.spinfo.classification.zoneAnalysis.classifier.WekaClassifier.java

License:Open Source License

private Instance instance(ClassifyUnit cu, Instances trainingSet) {
    double[] values = cu.getFeatureVector();
    String classID = ((ZoneClassifyUnit) cu).getActualClassID() + "";
    Instance instance = new SparseInstance(1, values);
    /*//from  w  w  w  .ja  v  a  2s  . com
     * Weka muss 'erklrt' bekommen, was die Werte bedeuten - dies ist im Trainingsset beschrieben:
     */
    instance.setDataset(trainingSet);
    /*
     * Beim Training geben wir den Instanzen ein Klassenlabel, bei der Klassifikation ist die Klasse unbekannt:
     */
    if (classID == "0") {
        instance.setClassMissing(); // bei Klassifikation
    } else
        instance.setClassValue(classID); // beim Training
    return instance;
}

From source file:edu.brandeis.wisedb.scheduler.training.decisiontree.DTSearcher.java

License:Open Source License

@Override
public List<Action> schedule(Set<ModelQuery> toSched) {
    SingularMachineState start = new SingularMachineState(toSched, qtp, sla);
    List<Action> toR = new LinkedList<Action>();

    applyLoop: while (!start.isGoalState()) {
        log.fine("Current state: " + start);

        SortedMap<String, String> features = start.getFeatures();
        Instance toClassify = new Instance(attributes.length);
        toClassify.setDataset(wekaDataSet);

        for (Attribute a : attributes) {
            if (a.name().equals("action")) {
                //toClassify.setValue(a, "N");
                continue;
            }/*  ww w  . j  a v a2 s .  c  o m*/

            try {

                if (features.get(a.name()).equals("?")) {
                    toClassify.setMissing(a);
                    continue;
                }
                try {
                    double d = Double.valueOf(features.get(a.name()));
                    toClassify.setValue(a, d);
                } catch (NumberFormatException e) {
                    toClassify.setValue(a, features.get(a.name()));
                }
            } catch (IllegalArgumentException e) {
                e.printStackTrace();
                log.warning(
                        "Encountered previously unseen attribute value! Might need better training data... making random selection.");
                log.warning("Value for attribute " + a.name() + " was " + features.get(a.name()));
                Action rand = getPUAction(start);
                log.warning("Random action selected: " + rand);
                toR.add(rand);
                start.applyAction(rand);
                continue applyLoop;
            }
        }

        toClassify.setClassMissing();
        log.finer("Going to classify: " + toClassify);

        try {
            double d = tree.classifyInstance(toClassify);
            toClassify.setClassValue(d);
            String action = toClassify.stringValue(toClassify.classIndex());
            log.finer("Got action string: " + action);

            Action selected = null;
            for (Action a : start.getPossibleActions()) {
                if (actionMatches(a, action)) {
                    selected = a;
                    break;
                }
            }

            if (selected == null) {
                //log.warning("Could not find applicable action for string: " + action + " ... picking random action");
                Action a = getPUAction(start);
                start.applyAction(a);
                toR.add(a);
                continue;
            }

            log.fine("Selected action: " + selected);

            start.applyAction(selected);

            toR.add(selected);

        } catch (Exception e) {
            // TODO Auto-generated catch block
            e.printStackTrace();
            return null;
        }
    }

    return toR;
}

From source file:edu.illinois.cs.cogcomp.lbjava.learn.WekaWrapper.java

License:Open Source License

/**
 * Creates a WEKA Instance object out of a {@link FeatureVector}.
 **//*from ww w. ja v a2 s  .co  m*/
private Instance makeInstance(int[] exampleFeatures, double[] exampleValues, int[] exampleLabels,
        double[] labelValues) {
    // Make sure attributeInfo has been filled
    if (attributeInfo.size() == 0) {
        System.err.println("WekaWrapper: Error - makeInstance was called while attributeInfo " + "was empty.");
        new Exception().printStackTrace();
        System.exit(1);
    }

    // Initialize an Instance object
    Instance inst = new Instance(attributeInfo.size());

    // Acknowledge that this instance will be a member of our dataset
    // 'instances'
    inst.setDataset(instances);

    // Assign values for its attributes
    /*
     * Since we are iterating through this example's feature list, which does not contain the
     * label feature (the label feature is the first in the 'attribute' list), we start attIndex
     * at 1, while we start featureIndex at 0.
     */
    for (int featureIndex = 0, attIndex = 1; featureIndex < exampleFeatures.length; ++featureIndex, ++attIndex) {
        Feature f = (Feature) lexicon.lookupKey(exampleFeatures[featureIndex]);
        Attribute att = (Attribute) attributeInfo.elementAt(attIndex);

        // make sure the feature's identifier and the attribute's name match
        if (!(att.name().equals(f.getStringIdentifier()))) {
            System.err.println(
                    "WekaWrapper: Error - makeInstance encountered a misaligned " + "attribute-feature pair.");
            System.err.println(
                    "  " + att.name() + " and " + f.getStringIdentifier() + " should have been identical.");
            new Exception().printStackTrace();
            System.exit(1);
        }

        if (!f.isDiscrete())
            inst.setValue(attIndex, exampleValues[featureIndex]);
        else { // it's a discrete or conjunctive feature.
            String attValue = f.totalValues() == 2 ? att.value((int) exampleValues[featureIndex])
                    : f.getStringValue();
            inst.setValue(attIndex, attValue);
        }
    }

    /*
     * Here, we assume that if either the labels FeatureVector is empty of features, or is null,
     * then this example is to be considered unlabeled.
     */
    if (exampleLabels.length == 0) {
        inst.setClassMissing();
    } else if (exampleLabels.length > 1) {
        System.err.println("WekaWrapper: Error - Weka Instances may only take a single class " + "value, ");
        new Exception().printStackTrace();
        System.exit(1);
    } else {
        Feature label = labelLexicon.lookupKey(exampleLabels[0]);

        // make sure the name of the label feature matches the name of the 0'th
        // attribute
        if (!(label.getStringIdentifier().equals(((Attribute) attributeInfo.elementAt(0)).name()))) {
            System.err.println("WekaWrapper: Error - makeInstance found the wrong label name.");
            new Exception().printStackTrace();
            System.exit(1);
        }

        if (!label.isDiscrete())
            inst.setValue(0, labelValues[0]);
        else
            inst.setValue(0, label.getStringValue());
    }

    return inst;
}

From source file:edu.illinois.cs.cogcomp.saul.learn.SaulWekaWrapper.java

License:Open Source License

/**
 * Creates a WEKA Instance object out of a {@link FeatureVector}.
 **//*from   w ww .ja  v a2s.c om*/
private Instance makeInstance(LBJavaInstance instance) {

    // Initialize an Instance object
    Instance inst = new Instance(attributeInfo.size());

    // Acknowledge that this instance will be a member of our dataset 'wekaInstances'
    inst.setDataset(wekaInstances);

    // set all nominal feature values to 0, which means those features are not used in this example
    for (int i = 1; i < attributeInfo.size(); i++)
        if (inst.attribute(i).isNominal())
            inst.setValue(i, "0");

    // Assign values for its attributes
    /*
     * Since we are iterating through this example's feature list, which does not contain the
     * label feature (the label feature is the first in the 'attribute' list), we set attIndex
     * to at exampleFeatures[featureIndices] + 1, while we start featureIndices at 0.
     */
    for (int featureIndex = 0; featureIndex < instance.featureIndices.length; ++featureIndex) {
        int attIndex = instance.featureIndices[featureIndex] + 1;
        Feature f = lexicon.lookupKey(instance.featureIndices[featureIndex]);

        // if the feature does not exist, do nothing. this may occur in test set.
        if (f == null)
            continue;
        Attribute att = (Attribute) attributeInfo.elementAt(attIndex);

        // make sure the feature and the attribute match
        if (!(att.name().equals(f.toString()))) {
            System.err.println(
                    "WekaWrapper: Error - makeInstance encountered a misaligned " + "attribute-feature pair.");
            System.err.println("  " + att.name() + " and " + f.toString() + " should have been identical.");
            new Exception().printStackTrace();
            System.exit(1);
        }
        if (f.isDiscrete())
            inst.setValue(attIndex, "1"); // this feature is used in this example so we set it to "1"
        else
            inst.setValue(attIndex, instance.featureValues[featureIndex]);

    }

    /*
     * Here, we assume that if either the labels FeatureVector is empty of features, or is null,
     * then this example is to be considered unlabeled.
     */
    if (instance.labelIndices.length == 0) {
        inst.setClassMissing();
    } else if (instance.labelIndices.length > 1) {
        System.err.println("WekaWrapper: Error - Weka Instances may only take a single class " + "value, ");
        new Exception().printStackTrace();
        System.exit(1);
    } else {
        Feature label = labelLexicon.lookupKey(instance.labelIndices[0]);

        // make sure the label feature matches the n 0'th attribute
        if (!(label.getGeneratingClassifier().equals(((Attribute) attributeInfo.elementAt(0)).name()))) {
            System.err.println("WekaWrapper: Error - makeInstance found the wrong label name.");
            new Exception().printStackTrace();
            System.exit(1);
        }

        if (!label.isDiscrete())
            inst.setValue(0, instance.labelValues[0]);
        else
            inst.setValue(0, label.getStringValue());
    }

    return inst;
}

From source file:elh.eus.absa.WekaWrapper.java

License:Open Source License

/**
 *      Train one vs all models over the given training data.
 *  //from   w  w w  .  ja v  a  2  s  . c om
 * @param modelpath directory to store each model for the one vs. all method
 * @param prefix prefix the models should have (each model will have the name of its class appended
 * @throws Exception
 */
public HashMap<Integer, HashMap<String, Double>> predictOneVsAll(String modelpath, String prefix)
        throws Exception {
    HashMap<Integer, HashMap<String, Double>> rslt = new HashMap<Integer, HashMap<String, Double>>();
    if ((testdata == null) || testdata.isEmpty()) {
        System.err.println("WekaWrapper: testModel() - no test data available, model won't be evaluated");
        System.exit(9);
    }

    Enumeration<Object> classValues = traindata.classAttribute().enumerateValues();
    HashMap<String, Classifier> cls = new HashMap<String, Classifier>();
    while (classValues.hasMoreElements()) {
        String v = (String) classValues.nextElement();
        //needed because of weka's sparse data format problems THIS IS TROUBLE! ...

        if (v.equalsIgnoreCase("dummy")) {
            continue;
        }

        try {
            Classifier cl = loadModel(modelpath + File.separator + prefix + "_" + v + ".model");
            cls.put(v, cl);
        } catch (Exception e) {
            System.err.println("classifier for class " + v + " could not be loaded, prediction aborted");
            System.exit(9);
        }
    }

    for (int i = 0; i < testdata.numInstances(); i++) {
        HashMap<String, Double> clResults = new HashMap<String, Double>();
        Instance inst = testdata.instance(i);
        int instId = (int) inst.value(testdata.attribute("instanceId").index());
        inst.setClassMissing();
        for (String currentClass : cls.keySet()) {
            double[] dist = cls.get(currentClass).distributionForInstance(inst);
            String[] classes = { "dummy", currentClass, "UNKNOWN" };
            System.out.print("instance " + instId + " (" + currentClass + ") --> ");
            for (int c = 0; c < dist.length; c++) {
                System.out.print("\t cl_" + c + " (" + classes[c] + ") = " + dist[c] + "; ");
            }
            System.out.print("\n");

            //first class is always the class to identify, if unknown class has better score store -1 for the class
            clResults.put(currentClass, dist[1]);
        }
        rslt.put(instId, clResults);
    }

    return rslt;
}

From source file:elh.eus.absa.WekaWrapper.java

License:Open Source License

/**
 *      Train one vs all models over the given training data.
 *  // w ww . j a v  a  2  s .c om
 * @param modelpath directory to store each model for the one vs. all method
 * @param prefix prefix the models should have (each model will have the name of its class appended
 * @throws Exception
 */
public HashMap<Integer, HashMap<String, Double>> addOneVsAllPredictions(String modelpath, String prefix,
        double thres) throws Exception {
    HashMap<Integer, HashMap<String, Double>> rslt = new HashMap<Integer, HashMap<String, Double>>();
    if ((testdata == null) || testdata.isEmpty()) {
        System.err.println("WekaWrapper: testModel() - no test data available, model won't be evaluated");
        System.exit(9);
    }

    Enumeration<Object> classValues = traindata.classAttribute().enumerateValues();
    HashMap<String, Classifier> cls = new HashMap<String, Classifier>();
    while (classValues.hasMoreElements()) {
        String v = (String) classValues.nextElement();
        //needed because of weka's sparse data format problems THIS IS TROUBLE! ...

        if (v.equalsIgnoreCase("dummy")) {
            continue;
        }

        try {
            Classifier cl = loadModel(modelpath + File.separator + prefix + "_" + v + ".model");
            cls.put(v, cl);
        } catch (Exception e) {
            System.err.println("classifier for class " + v + " could not be loaded, prediction aborted");
            System.exit(9);
        }
    }

    for (int i = 0; i < testdata.numInstances(); i++) {
        HashMap<String, Double> clResults = new HashMap<String, Double>();
        Instance inst = testdata.instance(i);
        int instId = (int) inst.value(testdata.attribute("instanceId").index());
        inst.setClassMissing();
        for (String currentClass : cls.keySet()) {
            double[] dist = cls.get(currentClass).distributionForInstance(inst);

            System.out.print("instance " + instId + " (" + currentClass + ") --> \n");
            /*   for (int c=0; c<dist.length; c++)
               {               
                  System.out.print("\t cl_"+c+" ("+") = "+dist[c]+"; ");               
               }
               System.out.print("\n");
            */
            //first class is always the class to identify, if unknown class has better score store -1 for the class
            clResults.put(currentClass, dist[1]);
        }
        rslt.put(instId, clResults);
    }

    return rslt;
}