Example usage for weka.classifiers Classifier classifyInstance

List of usage examples for weka.classifiers Classifier classifyInstance

Introduction

In this page you can find the example usage for weka.classifiers Classifier classifyInstance.

Prototype

public double classifyInstance(Instance instance) throws Exception;

Source Link

Document

Classifies the given test instance.

Usage

From source file:com.relationalcloud.partitioning.explanation.ExplanationHandler.java

License:Open Source License

/**
 * Repeat the selection from the database removing duplicates, since they will
 * only increase the execution time. And run the tuples through the classifier
 * to populate the justifiedpartition column.
 * /*from w ww  .j a v  a 2  s.  c  o m*/
 * @param tableProcessed
 * @param classifier
 * @param wa
 * @throws SQLException
 * @throws Exception
 */
public void populateJustifiedColumn(String tableProcessed, Classifier classifier, ArrayList<String> attributes,
        Connection conn, int numbPart, Enumeration enumclassvalues) throws SQLException, Exception {
    if (true) {
        labelTest(tableProcessed, classifier, conn);
        return;
    }

    tableProcessed = removeQuotes(tableProcessed);

    // get from the DB the tuples content and their partitioning column
    String sqlstring = "SELECT distinct g.tupleid, ";
    for (String sc : attributes) {
        sqlstring += "s." + sc + ", ";
    }
    sqlstring += "g." + pcol + " FROM " + "(SELECT distinct tupleid," + pcol + " FROM `" + testingtable
            + "` WHERE tableid = '" + tableProcessed + "') AS g, relcloud_" + tableProcessed + " AS s "
            + "WHERE s.relcloud_id = g.tupleid;";

    System.out.println(sqlstring);
    Statement stmt = conn.createStatement();

    // initializing the testing table to avoid complaints from classifier with
    // an hash partition like distribution
    if (!testingtable.equals(sampledtrainingtable)) {
        int i = 0;

        Object o = enumclassvalues.nextElement();

        // set everything to an existing value to ensure that every field is
        // covered
        stmt.executeUpdate("UPDATE " + testingtable + " SET " + pcol + "=" + o + " WHERE tableid = '"
                + tableProcessed + "'");
        // and than sparkly in a bunch of other values (unsure whether it is
        // required);
        while (enumclassvalues.hasMoreElements()) {
            o = enumclassvalues.nextElement();

            // FIXME there might still be an issue in which tupleid%i do not exists,
            // and thus one of the "o" never appears in the instance...
            stmt.executeUpdate("UPDATE " + testingtable + " SET " + pcol + "=" + o + " WHERE tupleid%"
                    + numbPart + "=" + i + " AND tableid = '" + tableProcessed + "'");
            i++;
        }
    }

    ResultSet res = stmt.executeQuery(sqlstring);
    // create an instance from the resultset
    Instances data_tupleid = WekaHelper.retrieveInstanceFromResultSetComplete(res, dbPropertyFile);
    res.close();

    data_tupleid.setClassIndex(data_tupleid.numAttributes() - 1);
    Instances data_no_tupleid = makeLastNominal(data_tupleid);
    data_no_tupleid.setClassIndex(data_no_tupleid.numAttributes() - 1);
    // remove tupleid from data_no_tupleid, still available in data_tupleid
    data_no_tupleid.deleteAttributeAt(0);

    // if(data_no_tupleid.classAttribute().numValues()>1){
    System.out.println("Running the tuples through the classifier to populate " + explainedPartitionCol);

    // use data that still has the tupleid and newData for the classification
    Enumeration enum_data_tupleid = data_tupleid.enumerateInstances();
    Enumeration enum_data_no_tupleid = data_no_tupleid.enumerateInstances();

    PreparedStatement updateJustCol = conn.prepareStatement("UPDATE `" + testingtable + "` SET `"
            + explainedPartitionCol + "` = ? " + "WHERE tableid = '" + tableProcessed + "' AND tupleid = ?;");

    while (enum_data_tupleid.hasMoreElements() && enum_data_no_tupleid.hasMoreElements()) {

        Instance tupIDinstance = (Instance) enum_data_tupleid.nextElement();
        Instance instance = (Instance) enum_data_no_tupleid.nextElement();

        double part = classifier.classifyInstance(instance);
        if (part == Instance.missingValue())
            System.err.println("No classification for:" + instance.toString());
        updateJustCol.setInt(1, (int) part);
        updateJustCol.setInt(2, (int) tupIDinstance.value(0));

        // System.out.println(tableProcessed+" "+ instance.value(0) + " " +
        // tupIDinstance.classValue() +" "+ part);

        updateJustCol.execute();
        updateJustCol.clearParameters();

    }

    updateJustCol.close();

}

From source file:cotraining.copy.Evaluation_D.java

License:Open Source License

/**
 * Evaluates the classifier on a single instance and records the
 * prediction (if the class is nominal).
 *
 * @param classifier machine learning classifier
 * @param instance the test instance to be classified
 * @return the prediction made by the clasifier
 * @throws Exception if model could not be evaluated 
 * successfully or the data contains string attributes
 *///  ww w.j a  v  a2s . com
public double evaluateModelOnceAndRecordPrediction(Classifier classifier, Instance instance) throws Exception {

    Instance classMissing = (Instance) instance.copy();
    double pred = 0;
    classMissing.setDataset(instance.dataset());
    classMissing.setClassMissing();
    if (m_ClassIsNominal) {
        if (m_Predictions == null) {
            m_Predictions = new FastVector();
        }
        double[] dist = classifier.distributionForInstance(classMissing);
        pred = Utils.maxIndex(dist);
        if (dist[(int) pred] <= 0) {
            pred = Instance.missingValue();
        }
        updateStatsForClassifier(dist, instance);
        m_Predictions.addElement(new NominalPrediction(instance.classValue(), dist, instance.weight()));
    } else {
        pred = classifier.classifyInstance(classMissing);
        updateStatsForPredictor(pred, instance);
    }
    return pred;
}

From source file:cotraining.copy.Evaluation_D.java

License:Open Source License

/**
 * store the prediction made by the classifier as a string
 * /*  w ww. ja va  2 s  .c  o m*/
 * @param classifier      the classifier to use
 * @param inst      the instance to generate text from
 * @param instNum      the index in the dataset
 * @param attributesToOutput   the indices of the attributes to output
 * @param printDistribution   prints the complete distribution for nominal 
 *             classes, not just the predicted value
 * @return                    the prediction as a String
 * @throws Exception      if something goes wrong
 * @see         #printClassifications(Classifier, Instances, String, int, Range, boolean)
 */
protected static String predictionText(Classifier classifier, Instance inst, int instNum,
        Range attributesToOutput, boolean printDistribution)

        throws Exception {

    StringBuffer result = new StringBuffer();
    int width = 10;
    int prec = 3;

    Instance withMissing = (Instance) inst.copy();
    withMissing.setDataset(inst.dataset());
    withMissing.setMissing(withMissing.classIndex());
    double predValue = classifier.classifyInstance(withMissing);

    // index
    result.append(Utils.padLeft("" + (instNum + 1), 6));

    if (inst.dataset().classAttribute().isNumeric()) {
        // actual
        if (inst.classIsMissing())
            result.append(" " + Utils.padLeft("?", width));
        else
            result.append(" " + Utils.doubleToString(inst.classValue(), width, prec));
        // predicted
        if (Instance.isMissingValue(predValue))
            result.append(" " + Utils.padLeft("?", width));
        else
            result.append(" " + Utils.doubleToString(predValue, width, prec));
        // error
        if (Instance.isMissingValue(predValue) || inst.classIsMissing())
            result.append(" " + Utils.padLeft("?", width));
        else
            result.append(" " + Utils.doubleToString(predValue - inst.classValue(), width, prec));
    } else {
        // actual
        result.append(" "
                + Utils.padLeft(((int) inst.classValue() + 1) + ":" + inst.toString(inst.classIndex()), width));
        // predicted
        if (Instance.isMissingValue(predValue))
            result.append(" " + Utils.padLeft("?", width));
        else
            result.append(" " + Utils.padLeft(
                    ((int) predValue + 1) + ":" + inst.dataset().classAttribute().value((int) predValue),
                    width));
        // error?
        if (!Instance.isMissingValue(predValue) && !inst.classIsMissing()
                && ((int) predValue + 1 != (int) inst.classValue() + 1))
            result.append(" " + "  +  ");
        else
            result.append(" " + "     ");
        // prediction/distribution
        if (printDistribution) {
            if (Instance.isMissingValue(predValue)) {
                result.append(" " + "?");
            } else {
                result.append(" ");
                double[] dist = classifier.distributionForInstance(withMissing);
                for (int n = 0; n < dist.length; n++) {
                    if (n > 0)
                        result.append(",");
                    if (n == (int) predValue)
                        result.append("*");
                    result.append(Utils.doubleToString(dist[n], prec));
                }
            }
        } else {
            if (Instance.isMissingValue(predValue))
                result.append(" " + "?");
            else
                result.append(" " + Utils.doubleToString(
                        classifier.distributionForInstance(withMissing)[(int) predValue], prec));
        }
    }

    // attributes
    result.append(" " + attributeValuesString(withMissing, attributesToOutput) + "\n");

    return result.toString();
}

From source file:de.fub.maps.project.detector.model.inference.processhandler.InferenceDataProcessHandler.java

License:Open Source License

@Override
protected void handle() {
    clearResults();/*from w w w.  java2 s .co  m*/

    Classifier classifier = getInferenceModel().getClassifier();
    HashSet<TrackSegment> inferenceDataSet = getInferenceDataSet();
    Collection<Attribute> attributeList = getInferenceModel().getAttributes();

    if (!attributeList.isEmpty()) {
        Set<String> keySet = getInferenceModel().getInput().getTrainingsSet().keySet();
        setClassesToView(keySet);

        Instances unlabeledInstances = new Instances("Unlabeld Tracks", new ArrayList<Attribute>(attributeList),
                0); //NO18N
        unlabeledInstances.setClassIndex(0);

        ArrayList<TrackSegment> segmentList = new ArrayList<TrackSegment>();
        for (TrackSegment segment : inferenceDataSet) {
            Instance instance = getInstance(segment);
            unlabeledInstances.add(instance);
            segmentList.add(segment);
        }

        // create copy
        Instances labeledInstances = new Instances(unlabeledInstances);

        for (int index = 0; index < labeledInstances.numInstances(); index++) {
            try {
                Instance instance = labeledInstances.instance(index);

                // classify instance
                double classifyed = classifier.classifyInstance(instance);
                instance.setClassValue(classifyed);

                // get class label
                String value = unlabeledInstances.classAttribute().value((int) classifyed);

                if (index < segmentList.size()) {
                    instanceToTrackSegmentMap.put(instance, segmentList.get(index));
                }

                // put label and instance to result map
                put(value, instance);

            } catch (Exception ex) {
                Exceptions.printStackTrace(ex);
            }
        }

        // update visw
        updateVisualRepresentation();

        // update result set of the inferenceModel
        for (Entry<String, List<Instance>> entry : resultMap.entrySet()) {
            HashSet<TrackSegment> trackSegmentList = new HashSet<TrackSegment>();
            for (Instance instance : entry.getValue()) {
                TrackSegment trackSegment = instanceToTrackSegmentMap.get(instance);
                if (trackSegment != null) {
                    trackSegmentList.add(trackSegment);
                }
            }

            // only those classes are put into  the result data set, which are not empty
            if (!trackSegmentList.isEmpty()) {
                getInferenceModel().getResult().put(entry.getKey(), trackSegmentList);
            }
        }
    } else {
        throw new InferenceModelClassifyException(MessageFormat
                .format("No attributes available. Attribute list lengeth == {0}", attributeList.size()));
    }
    resultMap.clear();
    instanceToTrackSegmentMap.clear();
}

From source file:de.fub.maps.project.detector.model.inference.processhandler.SpecialInferenceDataProcessHandler.java

License:Open Source License

@Override
protected void handle() {
    clearResults();//from ww w.j  a  v  a2s.co m

    Classifier classifier = getInferenceModel().getClassifier();
    Collection<Attribute> attributeList = getInferenceModel().getAttributes();

    if (!attributeList.isEmpty()) {
        Set<String> keySet = getInferenceModel().getInput().getTrainingsSet().keySet();
        setClassesToView(keySet);

        Instances unlabeledInstances = new Instances("Unlabeld Tracks", new ArrayList<Attribute>(attributeList),
                0); //NO18N
        unlabeledInstances.setClassIndex(0);

        ArrayList<TrackSegment> segmentList = new ArrayList<TrackSegment>();
        for (Entry<String, HashSet<TrackSegment>> entry : getInferenceModel().getInput().getTrainingsSet()
                .entrySet()) {
            for (TrackSegment segment : entry.getValue()) {
                segment.setLabel(entry.getKey());
                Instance instance = getInstance(segment);
                unlabeledInstances.add(instance);
                segmentList.add(segment);
            }
        }

        // create copy
        Instances labeledInstances = new Instances(unlabeledInstances);

        for (int index = 0; index < labeledInstances.numInstances(); index++) {
            try {
                Instance instance = labeledInstances.instance(index);

                // classify instance
                double classifyed = classifier.classifyInstance(instance);
                instance.setClassValue(classifyed);

                // get class label
                String value = unlabeledInstances.classAttribute().value((int) classifyed);

                if (index < segmentList.size()) {
                    instanceToTrackSegmentMap.put(instance, segmentList.get(index));
                }

                // put label and instance to result map
                put(value, instance);

            } catch (Exception ex) {
                Exceptions.printStackTrace(ex);
            }
        }

        // update visw
        updateVisualRepresentation();

        // update result set of the inferenceModel
        for (Map.Entry<String, List<Instance>> entry : resultMap.entrySet()) {
            HashSet<TrackSegment> trackSegmentList = new HashSet<TrackSegment>();
            for (Instance instance : entry.getValue()) {
                TrackSegment trackSegment = instanceToTrackSegmentMap.get(instance);
                if (trackSegment != null) {
                    trackSegmentList.add(trackSegment);
                }
            }

            // only those classes are put into  the result data set, which are not empty
            if (!trackSegmentList.isEmpty()) {
                getInferenceModel().getResult().put(entry.getKey(), trackSegmentList);
            }
        }
    } else {
        throw new InferenceModelClassifyException(MessageFormat
                .format("No attributes available. Attribute list lengeth == {0}", attributeList.size()));
    }
    resultMap.clear();
    instanceToTrackSegmentMap.clear();
}

From source file:de.tudarmstadt.ukp.alignment.framework.combined.WekaMachineLearning.java

License:Apache License

/**
 *
 * This method applies a serialized WEKA model file to an unlabeld .arff file for classification
 *
 *
 * @param input_arff the annotated gold standard in an .arff file
 * @param model output file for the model
 * @param output output file for evaluation of trained classifier (10-fold cross validation)
 * @throws Exception/*  w ww  .j  a  v a2s .  c o  m*/
 */

public static void applyModelToUnlabeledArff(String input_arff, String model, String output) throws Exception {
    DataSource source = new DataSource(input_arff);
    Instances unlabeled = source.getDataSet();
    if (unlabeled.classIndex() == -1) {
        unlabeled.setClassIndex(unlabeled.numAttributes() - 1);
    }

    Remove rm = new Remove();
    rm.setAttributeIndices("1"); // remove ID  attribute

    ObjectInputStream ois = new ObjectInputStream(new FileInputStream(model));
    Classifier cls = (Classifier) ois.readObject();
    ois.close();
    // create copy
    Instances labeled = new Instances(unlabeled);

    // label instances
    for (int i = 0; i < unlabeled.numInstances(); i++) {
        double clsLabel = cls.classifyInstance(unlabeled.instance(i));
        labeled.instance(i).setClassValue(clsLabel);
    }
    // save labeled data
    BufferedWriter writer = new BufferedWriter(new FileWriter(output));
    writer.write(labeled.toString());
    writer.newLine();
    writer.flush();
    writer.close();

}

From source file:de.tudarmstadt.ukp.dkpro.spelling.experiments.hoo2012.featureextraction.AllFeaturesExtractor.java

License:Apache License

@Override
public void process(JCas jcas) throws AnalysisEngineProcessException {
    Collection<Token> tokens = JCasUtil.select(jcas, Token.class);

    if (isTest) {

        Instances trainData = null;//from  ww w .j  ava 2s .  co  m
        Classifier cl = null;
        try {
            trainData = getInstances(trainingArff);

            cl = getClassifier();

            //                SpreadSubsample spread = new SpreadSubsample();
            //                spread.setDistributionSpread(1.0);
            //                    
            //                FilteredClassifier fc = new FilteredClassifier();
            //                fc.setFilter(spread);
            //                fc.setClassifier(cl);

            cl.buildClassifier(trainData);
        } catch (Exception e) {
            throw new AnalysisEngineProcessException(e);
        }

        for (Token token : tokens) {
            String tokenString = token.getCoveredText();
            if (tokenString.length() > 0 && confusionSet.contains(tokenString)) {
                Instance<String> instance = new Instance<String>();
                for (SimpleFeatureExtractor featExt : featureExtractors) {
                    instance.addAll(featExt.extract(jcas, token));
                }

                instance.setOutcome(tokenString);

                List<String> classValues = new ArrayList<String>();
                for (Enumeration e = trainData.classAttribute().enumerateValues(); e.hasMoreElements();) {
                    classValues.add(e.nextElement().toString());
                }

                // build classifier from training arff and classify
                try {
                    weka.core.Instance wekaInstance = CleartkInstanceConverter.toWekaInstance(instance,
                            classValues);
                    System.out.println(wekaInstance);
                    double prediction = cl.classifyInstance(wekaInstance);

                    // prediction is the index in the class labels, not the class label itself!
                    String outcome = trainData.classAttribute().value(new Double(prediction).intValue());

                    if (!tokenString.equals(outcome)) {
                        SpellingAnomaly ann = new SpellingAnomaly(jcas, token.getBegin(), token.getEnd());
                        ann.setCategory(errorClass);
                        ann.setSuggestions(SpellingUtils.getSuggestedActionArray(jcas, outcome));
                        ann.addToIndexes();
                    }
                } catch (Exception e) {
                    throw new AnalysisEngineProcessException(e);
                }
            }
        }
    } else {
        for (Token token : tokens) {
            String tokenString = token.getCoveredText();
            if (tokenString.length() > 0 && confusionSet.contains(tokenString)) {
                Instance<String> instance = new Instance<String>();
                for (SimpleFeatureExtractor featExt : featureExtractors) {
                    instance.addAll(featExt.extract(jcas, token));
                }

                instance.setOutcome(tokenString);

                // we also need to add a negative example
                // choose it randomly from the confusion set without the actual token
                // TODO implement negative examples

                this.dataWriter.write(instance);
            }
        }
    }
}

From source file:edu.oregonstate.eecs.mcplan.abstraction.EvaluateSimilarityFunction.java

License:Open Source License

public static ClusterContingencyTable evaluateClassifier(final Classifier classifier, final Instances test) {
    try {//from w  ww  .  j  av a 2  s.co m
        final Map<Integer, Set<RealVector>> Umap = new TreeMap<Integer, Set<RealVector>>();
        final Map<Integer, Set<RealVector>> Vmap = new TreeMap<Integer, Set<RealVector>>();

        final Remove rm_filter = new Remove();
        rm_filter.setAttributeIndicesArray(new int[] { test.classIndex() });
        rm_filter.setInputFormat(test);

        for (final Instance i : test) {
            rm_filter.input(i);
            final double[] phi = rm_filter.output().toDoubleArray();
            //            final double[] phi = WekaUtil.unlabeledFeatures( i );

            final int cluster = (int) classifier.classifyInstance(i);
            Set<RealVector> u = Umap.get(cluster);
            if (u == null) {
                u = new HashSet<RealVector>();
                Umap.put(cluster, u);
            }
            u.add(new ArrayRealVector(phi));

            final int true_label = (int) i.classValue();
            Set<RealVector> v = Vmap.get(true_label);
            if (v == null) {
                v = new HashSet<RealVector>();
                Vmap.put(true_label, v);
            }
            v.add(new ArrayRealVector(phi));
        }

        final ArrayList<Set<RealVector>> U = new ArrayList<Set<RealVector>>();
        for (final Map.Entry<Integer, Set<RealVector>> e : Umap.entrySet()) {
            U.add(e.getValue());
        }

        final ArrayList<Set<RealVector>> V = new ArrayList<Set<RealVector>>();
        for (final Map.Entry<Integer, Set<RealVector>> e : Vmap.entrySet()) {
            V.add(e.getValue());
        }

        return new ClusterContingencyTable(U, V);
    } catch (final RuntimeException ex) {
        throw ex;
    } catch (final Exception ex) {
        throw new RuntimeException(ex);
    }
}

From source file:edu.oregonstate.eecs.mcplan.abstraction.WekaUtil.java

License:Open Source License

/**
 * Classify a feature vector that is not part of an Instances object.
 * @param classifier//from  w  w w.  j a v  a 2s .c  o  m
 * @param attributes
 * @param features
 * @return
 */
public static double classify(final Classifier classifier, final List<Attribute> attributes,
        final double[] features) {
    final Instances x = createSingletonInstances(attributes, features);
    try {
        return classifier.classifyInstance(x.get(0));
    } catch (final Exception ex) {
        throw new RuntimeException(ex);
    }
}

From source file:edu.umbc.cs.maple.utils.WekaUtils.java

License:Open Source License

/** Uses the given model to predict the classes of the data.
 * @param model/*from   w ww . ja  va  2  s.  com*/
 * @param data
 * @return An array of the class predictions.
 */
public static int[] predictClasses(Classifier model, Instances data) {
    int[] predictions = new int[data.numInstances()];
    int numInstances = data.numInstances();
    for (int instIdx = 0; instIdx < numInstances; instIdx++) {
        try {
            predictions[instIdx] = (int) model.classifyInstance(data.instance(instIdx));
        } catch (Exception e) {
            predictions[instIdx] = -1;
        }
    }
    return predictions;
}