Example usage for weka.classifiers Classifier classifyInstance

Introduction

In this page you can find the example usage for weka.classifiers Classifier classifyInstance.

Prototype

public double classifyInstance(Instance instance) throws Exception;

Source Link

Document

Classifies the given test instance.

Usage

From source file:com.relationalcloud.partitioning.explanation.ExplanationHandler.java

License:Open Source License

/**
 * Repeat the selection from the database removing duplicates, since they will
 * only increase the execution time. And run the tuples through the classifier
 * to populate the justifiedpartition column.
 * /*from w ww  .j a v  a 2  s.  c  o m*/
 * @param tableProcessed
 * @param classifier
 * @param wa
 * @throws SQLException
 * @throws Exception
 */
public void populateJustifiedColumn(String tableProcessed, Classifier classifier, ArrayList<String> attributes,
        Connection conn, int numbPart, Enumeration enumclassvalues) throws SQLException, Exception {
    if (true) {
        labelTest(tableProcessed, classifier, conn);
        return;
    }

    tableProcessed = removeQuotes(tableProcessed);

    // get from the DB the tuples content and their partitioning column
    String sqlstring = "SELECT distinct g.tupleid, ";
    for (String sc : attributes) {
        sqlstring += "s." + sc + ", ";
    }
    sqlstring += "g." + pcol + " FROM " + "(SELECT distinct tupleid," + pcol + " FROM `" + testingtable
            + "` WHERE tableid = '" + tableProcessed + "') AS g, relcloud_" + tableProcessed + " AS s "
            + "WHERE s.relcloud_id = g.tupleid;";

    System.out.println(sqlstring);
    Statement stmt = conn.createStatement();

    // initializing the testing table to avoid complaints from classifier with
    // an hash partition like distribution
    if (!testingtable.equals(sampledtrainingtable)) {
        int i = 0;

        Object o = enumclassvalues.nextElement();

        // set everything to an existing value to ensure that every field is
        // covered
        stmt.executeUpdate("UPDATE " + testingtable + " SET " + pcol + "=" + o + " WHERE tableid = '"
                + tableProcessed + "'");
        // and than sparkly in a bunch of other values (unsure whether it is
        // required);
        while (enumclassvalues.hasMoreElements()) {
            o = enumclassvalues.nextElement();

            // FIXME there might still be an issue in which tupleid%i do not exists,
            // and thus one of the "o" never appears in the instance...
            stmt.executeUpdate("UPDATE " + testingtable + " SET " + pcol + "=" + o + " WHERE tupleid%"
                    + numbPart + "=" + i + " AND tableid = '" + tableProcessed + "'");
            i++;
        }
    }

    ResultSet res = stmt.executeQuery(sqlstring);
    // create an instance from the resultset
    Instances data_tupleid = WekaHelper.retrieveInstanceFromResultSetComplete(res, dbPropertyFile);
    res.close();

    data_tupleid.setClassIndex(data_tupleid.numAttributes() - 1);
    Instances data_no_tupleid = makeLastNominal(data_tupleid);
    data_no_tupleid.setClassIndex(data_no_tupleid.numAttributes() - 1);
    // remove tupleid from data_no_tupleid, still available in data_tupleid
    data_no_tupleid.deleteAttributeAt(0);

    // if(data_no_tupleid.classAttribute().numValues()>1){
    System.out.println("Running the tuples through the classifier to populate " + explainedPartitionCol);

    // use data that still has the tupleid and newData for the classification
    Enumeration enum_data_tupleid = data_tupleid.enumerateInstances();
    Enumeration enum_data_no_tupleid = data_no_tupleid.enumerateInstances();

    PreparedStatement updateJustCol = conn.prepareStatement("UPDATE `" + testingtable + "` SET `"
            + explainedPartitionCol + "` = ? " + "WHERE tableid = '" + tableProcessed + "' AND tupleid = ?;");

    while (enum_data_tupleid.hasMoreElements() && enum_data_no_tupleid.hasMoreElements()) {

        Instance tupIDinstance = (Instance) enum_data_tupleid.nextElement();
        Instance instance = (Instance) enum_data_no_tupleid.nextElement();

        double part = classifier.classifyInstance(instance);
        if (part == Instance.missingValue())
            System.err.println("No classification for:" + instance.toString());
        updateJustCol.setInt(1, (int) part);
        updateJustCol.setInt(2, (int) tupIDinstance.value(0));

        // System.out.println(tableProcessed+" "+ instance.value(0) + " " +
        // tupIDinstance.classValue() +" "+ part);

        updateJustCol.execute();
        updateJustCol.clearParameters();

    }

    updateJustCol.close();

}

From source file:cotraining.copy.Evaluation_D.java

License:Open Source License

/**
 * Evaluates the classifier on a single instance and records the
 * prediction (if the class is nominal).
 *
 * @param classifier machine learning classifier
 * @param instance the test instance to be classified
 * @return the prediction made by the clasifier
 * @throws Exception if model could not be evaluated 
 * successfully or the data contains string attributes
 *///  ww w.j a  v  a2s . com
public double evaluateModelOnceAndRecordPrediction(Classifier classifier, Instance instance) throws Exception {

    Instance classMissing = (Instance) instance.copy();
    double pred = 0;
    classMissing.setDataset(instance.dataset());
    classMissing.setClassMissing();
    if (m_ClassIsNominal) {
        if (m_Predictions == null) {
            m_Predictions = new FastVector();
        }
        double[] dist = classifier.distributionForInstance(classMissing);
        pred = Utils.maxIndex(dist);
        if (dist[(int) pred] <= 0) {
            pred = Instance.missingValue();
        }
        updateStatsForClassifier(dist, instance);
        m_Predictions.addElement(new NominalPrediction(instance.classValue(), dist, instance.weight()));
    } else {
        pred = classifier.classifyInstance(classMissing);
        updateStatsForPredictor(pred, instance);
    }
    return pred;
}

From source file:cotraining.copy.Evaluation_D.java

License:Open Source License

/**
 * store the prediction made by the classifier as a string
 * /*  w ww. ja va  2 s  .c  o m*/
 * @param classifier      the classifier to use
 * @param inst      the instance to generate text from
 * @param instNum      the index in the dataset
 * @param attributesToOutput   the indices of the attributes to output
 * @param printDistribution   prints the complete distribution for nominal 
 *             classes, not just the predicted value
 * @return                    the prediction as a String
 * @throws Exception      if something goes wrong
 * @see         #printClassifications(Classifier, Instances, String, int, Range, boolean)
 */
protected static String predictionText(Classifier classifier, Instance inst, int instNum,
        Range attributesToOutput, boolean printDistribution)

        throws Exception {

    StringBuffer result = new StringBuffer();
    int width = 10;
    int prec = 3;

    Instance withMissing = (Instance) inst.copy();
    withMissing.setDataset(inst.dataset());
    withMissing.setMissing(withMissing.classIndex());
    double predValue = classifier.classifyInstance(withMissing);

    // index
    result.append(Utils.padLeft("" + (instNum + 1), 6));

    if (inst.dataset().classAttribute().isNumeric()) {
        // actual
        if (inst.classIsMissing())
            result.append(" " + Utils.padLeft("?", width));
        else
            result.append(" " + Utils.doubleToString(inst.classValue(), width, prec));
        // predicted
        if (Instance.isMissingValue(predValue))
            result.append(" " + Utils.padLeft("?", width));
        else
            result.append(" " + Utils.doubleToString(predValue, width, prec));
        // error
        if (Instance.isMissingValue(predValue) || inst.classIsMissing())
            result.append(" " + Utils.padLeft("?", width));
        else
            result.append(" " + Utils.doubleToString(predValue - inst.classValue(), width, prec));
    } else {
        // actual
        result.append(" "
                + Utils.padLeft(((int) inst.classValue() + 1) + ":" + inst.toString(inst.classIndex()), width));
        // predicted
        if (Instance.isMissingValue(predValue))
            result.append(" " + Utils.padLeft("?", width));
        else
            result.append(" " + Utils.padLeft(
                    ((int) predValue + 1) + ":" + inst.dataset().classAttribute().value((int) predValue),
                    width));
        // error?
        if (!Instance.isMissingValue(predValue) && !inst.classIsMissing()
                && ((int) predValue + 1 != (int) inst.classValue() + 1))
            result.append(" " + "  +  ");
        else
            result.append(" " + "     ");
        // prediction/distribution
        if (printDistribution) {
            if (Instance.isMissingValue(predValue)) {
                result.append(" " + "?");
            } else {
                result.append(" ");
                double[] dist = classifier.distributionForInstance(withMissing);
                for (int n = 0; n < dist.length; n++) {
                    if (n > 0)
                        result.append(",");
                    if (n == (int) predValue)
                        result.append("*");
                    result.append(Utils.doubleToString(dist[n], prec));
                }
            }
        } else {
            if (Instance.isMissingValue(predValue))
                result.append(" " + "?");
            else
                result.append(" " + Utils.doubleToString(
                        classifier.distributionForInstance(withMissing)[(int) predValue], prec));
        }
    }

    // attributes
    result.append(" " + attributeValuesString(withMissing, attributesToOutput) + "\n");

    return result.toString();
}

From source file:de.fub.maps.project.detector.model.inference.processhandler.InferenceDataProcessHandler.java

License:Open Source License

@Override
protected void handle() {
    clearResults();/*from w w w.  java2 s .co  m*/

    Classifier classifier = getInferenceModel().getClassifier();
    HashSet<TrackSegment> inferenceDataSet = getInferenceDataSet();
    Collection<Attribute> attributeList = getInferenceModel().getAttributes();

    if (!attributeList.isEmpty()) {
        Set<String> keySet = getInferenceModel().getInput().getTrainingsSet().keySet();
        setClassesToView(keySet);

        Instances unlabeledInstances = new Instances("Unlabeld Tracks", new ArrayList<Attribute>(attributeList),
                0); //NO18N
        unlabeledInstances.setClassIndex(0);

        ArrayList<TrackSegment> segmentList = new ArrayList<TrackSegment>();
        for (TrackSegment segment : inferenceDataSet) {
            Instance instance = getInstance(segment);
            unlabeledInstances.add(instance);
            segmentList.add(segment);
        }

        // create copy
        Instances labeledInstances = new Instances(unlabeledInstances);

        for (int index = 0; index < labeledInstances.numInstances(); index++) {
            try {
                Instance instance = labeledInstances.instance(index);

                // classify instance
                double classifyed = classifier.classifyInstance(instance);
                instance.setClassValue(classifyed);

                // get class label
                String value = unlabeledInstances.classAttribute().value((int) classifyed);

                if (index < segmentList.size()) {
                    instanceToTrackSegmentMap.put(instance, segmentList.get(index));
                }

                // put label and instance to result map
                put(value, instance);

            } catch (Exception ex) {
                Exceptions.printStackTrace(ex);
            }
        }

        // update visw
        updateVisualRepresentation();

        // update result set of the inferenceModel
        for (Entry<String, List<Instance>> entry : resultMap.entrySet()) {
            HashSet<TrackSegment> trackSegmentList = new HashSet<TrackSegment>();
            for (Instance instance : entry.getValue()) {
                TrackSegment trackSegment = instanceToTrackSegmentMap.get(instance);
                if (trackSegment != null) {
                    trackSegmentList.add(trackSegment);
                }
            }

            // only those classes are put into  the result data set, which are not empty
            if (!trackSegmentList.isEmpty()) {
                getInferenceModel().getResult().put(entry.getKey(), trackSegmentList);
            }
        }
    } else {
        throw new InferenceModelClassifyException(MessageFormat
                .format("No attributes available. Attribute list lengeth == {0}", attributeList.size()));
    }
    resultMap.clear();
    instanceToTrackSegmentMap.clear();
}

From source file:de.fub.maps.project.detector.model.inference.processhandler.SpecialInferenceDataProcessHandler.java

License:Open Source License

@Override
protected void handle() {
    clearResults();//from ww w.j  a  v  a2s.co m

    Classifier classifier = getInferenceModel().getClassifier();
    Collection<Attribute> attributeList = getInferenceModel().getAttributes();

    if (!attributeList.isEmpty()) {
        Set<String> keySet = getInferenceModel().getInput().getTrainingsSet().keySet();
        setClassesToView(keySet);

        Instances unlabeledInstances = new Instances("Unlabeld Tracks", new ArrayList<Attribute>(attributeList),
                0); //NO18N
        unlabeledInstances.setClassIndex(0);

        ArrayList<TrackSegment> segmentList = new ArrayList<TrackSegment>();
        for (Entry<String, HashSet<TrackSegment>> entry : getInferenceModel().getInput().getTrainingsSet()
                .entrySet()) {
            for (TrackSegment segment : entry.getValue()) {
                segment.setLabel(entry.getKey());
                Instance instance = getInstance(segment);
                unlabeledInstances.add(instance);
                segmentList.add(segment);
            }
        }

        // create copy
        Instances labeledInstances = new Instances(unlabeledInstances);

        for (int index = 0; index < labeledInstances.numInstances(); index++) {
            try {
                Instance instance = labeledInstances.instance(index);

                // classify instance
                double classifyed = classifier.classifyInstance(instance);
                instance.setClassValue(classifyed);

                // get class label
                String value = unlabeledInstances.classAttribute().value((int) classifyed);

                if (index < segmentList.size()) {
                    instanceToTrackSegmentMap.put(instance, segmentList.get(index));
                }

                // put label and instance to result map
                put(value, instance);

            } catch (Exception ex) {
                Exceptions.printStackTrace(ex);
            }
        }

        // update visw
        updateVisualRepresentation();

        // update result set of the inferenceModel
        for (Map.Entry<String, List<Instance>> entry : resultMap.entrySet()) {
            HashSet<TrackSegment> trackSegmentList = new HashSet<TrackSegment>();
            for (Instance instance : entry.getValue()) {
                TrackSegment trackSegment = instanceToTrackSegmentMap.get(instance);
                if (trackSegment != null) {
                    trackSegmentList.add(trackSegment);
                }
            }

            // only those classes are put into  the result data set, which are not empty
            if (!trackSegmentList.isEmpty()) {
                getInferenceModel().getResult().put(entry.getKey(), trackSegmentList);
            }
        }
    } else {
        throw new InferenceModelClassifyException(MessageFormat
                .format("No attributes available. Attribute list lengeth == {0}", attributeList.size()));
    }
    resultMap.clear();
    instanceToTrackSegmentMap.clear();
}

From source file:de.tudarmstadt.ukp.alignment.framework.combined.WekaMachineLearning.java

License:Apache License

/**
 *
 * This method applies a serialized WEKA model file to an unlabeld .arff file for classification
 *
 *
 * @param input_arff the annotated gold standard in an .arff file
 * @param model output file for the model
 * @param output output file for evaluation of trained classifier (10-fold cross validation)
 * @throws Exception/*  w ww  .j  a  v a2s .  c o  m*/
 */

public static void applyModelToUnlabeledArff(String input_arff, String model, String output) throws Exception {
    DataSource source = new DataSource(input_arff);
    Instances unlabeled = source.getDataSet();
    if (unlabeled.classIndex() == -1) {
        unlabeled.setClassIndex(unlabeled.numAttributes() - 1);
    }

    Remove rm = new Remove();
    rm.setAttributeIndices("1"); // remove ID  attribute

    ObjectInputStream ois = new ObjectInputStream(new FileInputStream(model));
    Classifier cls = (Classifier) ois.readObject();
    ois.close();
    // create copy
    Instances labeled = new Instances(unlabeled);

    // label instances
    for (int i = 0; i < unlabeled.numInstances(); i++) {
        double clsLabel = cls.classifyInstance(unlabeled.instance(i));
        labeled.instance(i).setClassValue(clsLabel);
    }
    // save labeled data
    BufferedWriter writer = new BufferedWriter(new FileWriter(output));
    writer.write(labeled.toString());
    writer.newLine();
    writer.flush();
    writer.close();

}

From source file:de.tudarmstadt.ukp.dkpro.spelling.experiments.hoo2012.featureextraction.AllFeaturesExtractor.java

License:Apache License

@Override
public void process(JCas jcas) throws AnalysisEngineProcessException {
    Collection<Token> tokens = JCasUtil.select(jcas, Token.class);

    if (isTest) {

        Instances trainData = null;//from  ww w .j  ava 2s .  co  m
        Classifier cl = null;
        try {
            trainData = getInstances(trainingArff);

            cl = getClassifier();

            //                SpreadSubsample spread = new SpreadSubsample();
            //                spread.setDistributionSpread(1.0);
            //                    
            //                FilteredClassifier fc = new FilteredClassifier();
            //                fc.setFilter(spread);
            //                fc.setClassifier(cl);

            cl.buildClassifier(trainData);
        } catch (Exception e) {
            throw new AnalysisEngineProcessException(e);
        }

        for (Token token : tokens) {
            String tokenString = token.getCoveredText();
            if (tokenString.length() > 0 && confusionSet.contains(tokenString)) {
                Instance<String> instance = new Instance<String>();
                for (SimpleFeatureExtractor featExt : featureExtractors) {
                    instance.addAll(featExt.extract(jcas, token));
                }

                instance.setOutcome(tokenString);

                List<String> classValues = new ArrayList<String>();
                for (Enumeration e = trainData.classAttribute().enumerateValues(); e.hasMoreElements();) {
                    classValues.add(e.nextElement().toString());
                }

                // build classifier from training arff and classify
                try {
                    weka.core.Instance wekaInstance = CleartkInstanceConverter.toWekaInstance(instance,
                            classValues);
                    System.out.println(wekaInstance);
                    double prediction = cl.classifyInstance(wekaInstance);

                    // prediction is the index in the class labels, not the class label itself!
                    String outcome = trainData.classAttribute().value(new Double(prediction).intValue());

                    if (!tokenString.equals(outcome)) {
                        SpellingAnomaly ann = new SpellingAnomaly(jcas, token.getBegin(), token.getEnd());
                        ann.setCategory(errorClass);
                        ann.setSuggestions(SpellingUtils.getSuggestedActionArray(jcas, outcome));
                        ann.addToIndexes();
                    }
                } catch (Exception e) {
                    throw new AnalysisEngineProcessException(e);
                }
            }
        }
    } else {
        for (Token token : tokens) {
            String tokenString = token.getCoveredText();
            if (tokenString.length() > 0 && confusionSet.contains(tokenString)) {
                Instance<String> instance = new Instance<String>();
                for (SimpleFeatureExtractor featExt : featureExtractors) {
                    instance.addAll(featExt.extract(jcas, token));
                }

                instance.setOutcome(tokenString);

                // we also need to add a negative example
                // choose it randomly from the confusion set without the actual token
                // TODO implement negative examples

                this.dataWriter.write(instance);
            }
        }
    }
}

From source file:edu.oregonstate.eecs.mcplan.abstraction.EvaluateSimilarityFunction.java

License:Open Source License

public static ClusterContingencyTable evaluateClassifier(final Classifier classifier, final Instances test) {
    try {//from w  ww  .  j  av a 2  s.co m
        final Map<Integer, Set<RealVector>> Umap = new TreeMap<Integer, Set<RealVector>>();
        final Map<Integer, Set<RealVector>> Vmap = new TreeMap<Integer, Set<RealVector>>();

        final Remove rm_filter = new Remove();
        rm_filter.setAttributeIndicesArray(new int[] { test.classIndex() });
        rm_filter.setInputFormat(test);

        for (final Instance i : test) {
            rm_filter.input(i);
            final double[] phi = rm_filter.output().toDoubleArray();
            //            final double[] phi = WekaUtil.unlabeledFeatures( i );

            final int cluster = (int) classifier.classifyInstance(i);
            Set<RealVector> u = Umap.get(cluster);
            if (u == null) {
                u = new HashSet<RealVector>();
                Umap.put(cluster, u);
            }
            u.add(new ArrayRealVector(phi));

            final int true_label = (int) i.classValue();
            Set<RealVector> v = Vmap.get(true_label);
            if (v == null) {
                v = new HashSet<RealVector>();
                Vmap.put(true_label, v);
            }
            v.add(new ArrayRealVector(phi));
        }

        final ArrayList<Set<RealVector>> U = new ArrayList<Set<RealVector>>();
        for (final Map.Entry<Integer, Set<RealVector>> e : Umap.entrySet()) {
            U.add(e.getValue());
        }

        final ArrayList<Set<RealVector>> V = new ArrayList<Set<RealVector>>();
        for (final Map.Entry<Integer, Set<RealVector>> e : Vmap.entrySet()) {
            V.add(e.getValue());
        }

        return new ClusterContingencyTable(U, V);
    } catch (final RuntimeException ex) {
        throw ex;
    } catch (final Exception ex) {
        throw new RuntimeException(ex);
    }
}

From source file:edu.oregonstate.eecs.mcplan.abstraction.WekaUtil.java

License:Open Source License

/**
 * Classify a feature vector that is not part of an Instances object.
 * @param classifier//from  w  w w.  j a v  a 2s .c  o  m
 * @param attributes
 * @param features
 * @return
 */
public static double classify(final Classifier classifier, final List<Attribute> attributes,
        final double[] features) {
    final Instances x = createSingletonInstances(attributes, features);
    try {
        return classifier.classifyInstance(x.get(0));
    } catch (final Exception ex) {
        throw new RuntimeException(ex);
    }
}

From source file:edu.umbc.cs.maple.utils.WekaUtils.java

License:Open Source License

/** Uses the given model to predict the classes of the data.
 * @param model/*from   w ww . ja  va  2  s.  com*/
 * @param data
 * @return An array of the class predictions.
 */
public static int[] predictClasses(Classifier model, Instances data) {
    int[] predictions = new int[data.numInstances()];
    int numInstances = data.numInstances();
    for (int instIdx = 0; instIdx < numInstances; instIdx++) {
        try {
            predictions[instIdx] = (int) model.classifyInstance(data.instance(instIdx));
        } catch (Exception e) {
            predictions[instIdx] = -1;
        }
    }
    return predictions;
}