List of usage examples for weka.classifiers Classifier classifyInstance
public double classifyInstance(Instance instance) throws Exception;
From source file:com.relationalcloud.partitioning.explanation.ExplanationHandler.java
License:Open Source License
/** * Repeat the selection from the database removing duplicates, since they will * only increase the execution time. And run the tuples through the classifier * to populate the justifiedpartition column. * /*from w ww .j a v a 2 s. c o m*/ * @param tableProcessed * @param classifier * @param wa * @throws SQLException * @throws Exception */ public void populateJustifiedColumn(String tableProcessed, Classifier classifier, ArrayList<String> attributes, Connection conn, int numbPart, Enumeration enumclassvalues) throws SQLException, Exception { if (true) { labelTest(tableProcessed, classifier, conn); return; } tableProcessed = removeQuotes(tableProcessed); // get from the DB the tuples content and their partitioning column String sqlstring = "SELECT distinct g.tupleid, "; for (String sc : attributes) { sqlstring += "s." + sc + ", "; } sqlstring += "g." + pcol + " FROM " + "(SELECT distinct tupleid," + pcol + " FROM `" + testingtable + "` WHERE tableid = '" + tableProcessed + "') AS g, relcloud_" + tableProcessed + " AS s " + "WHERE s.relcloud_id = g.tupleid;"; System.out.println(sqlstring); Statement stmt = conn.createStatement(); // initializing the testing table to avoid complaints from classifier with // an hash partition like distribution if (!testingtable.equals(sampledtrainingtable)) { int i = 0; Object o = enumclassvalues.nextElement(); // set everything to an existing value to ensure that every field is // covered stmt.executeUpdate("UPDATE " + testingtable + " SET " + pcol + "=" + o + " WHERE tableid = '" + tableProcessed + "'"); // and than sparkly in a bunch of other values (unsure whether it is // required); while (enumclassvalues.hasMoreElements()) { o = enumclassvalues.nextElement(); // FIXME there might still be an issue in which tupleid%i do not exists, // and thus one of the "o" never appears in the instance... stmt.executeUpdate("UPDATE " + testingtable + " SET " + pcol + "=" + o + " WHERE tupleid%" + numbPart + "=" + i + " AND tableid = '" + tableProcessed + "'"); i++; } } ResultSet res = stmt.executeQuery(sqlstring); // create an instance from the resultset Instances data_tupleid = WekaHelper.retrieveInstanceFromResultSetComplete(res, dbPropertyFile); res.close(); data_tupleid.setClassIndex(data_tupleid.numAttributes() - 1); Instances data_no_tupleid = makeLastNominal(data_tupleid); data_no_tupleid.setClassIndex(data_no_tupleid.numAttributes() - 1); // remove tupleid from data_no_tupleid, still available in data_tupleid data_no_tupleid.deleteAttributeAt(0); // if(data_no_tupleid.classAttribute().numValues()>1){ System.out.println("Running the tuples through the classifier to populate " + explainedPartitionCol); // use data that still has the tupleid and newData for the classification Enumeration enum_data_tupleid = data_tupleid.enumerateInstances(); Enumeration enum_data_no_tupleid = data_no_tupleid.enumerateInstances(); PreparedStatement updateJustCol = conn.prepareStatement("UPDATE `" + testingtable + "` SET `" + explainedPartitionCol + "` = ? " + "WHERE tableid = '" + tableProcessed + "' AND tupleid = ?;"); while (enum_data_tupleid.hasMoreElements() && enum_data_no_tupleid.hasMoreElements()) { Instance tupIDinstance = (Instance) enum_data_tupleid.nextElement(); Instance instance = (Instance) enum_data_no_tupleid.nextElement(); double part = classifier.classifyInstance(instance); if (part == Instance.missingValue()) System.err.println("No classification for:" + instance.toString()); updateJustCol.setInt(1, (int) part); updateJustCol.setInt(2, (int) tupIDinstance.value(0)); // System.out.println(tableProcessed+" "+ instance.value(0) + " " + // tupIDinstance.classValue() +" "+ part); updateJustCol.execute(); updateJustCol.clearParameters(); } updateJustCol.close(); }
From source file:cotraining.copy.Evaluation_D.java
License:Open Source License
/** * Evaluates the classifier on a single instance and records the * prediction (if the class is nominal). * * @param classifier machine learning classifier * @param instance the test instance to be classified * @return the prediction made by the clasifier * @throws Exception if model could not be evaluated * successfully or the data contains string attributes */// ww w.j a v a2s . com public double evaluateModelOnceAndRecordPrediction(Classifier classifier, Instance instance) throws Exception { Instance classMissing = (Instance) instance.copy(); double pred = 0; classMissing.setDataset(instance.dataset()); classMissing.setClassMissing(); if (m_ClassIsNominal) { if (m_Predictions == null) { m_Predictions = new FastVector(); } double[] dist = classifier.distributionForInstance(classMissing); pred = Utils.maxIndex(dist); if (dist[(int) pred] <= 0) { pred = Instance.missingValue(); } updateStatsForClassifier(dist, instance); m_Predictions.addElement(new NominalPrediction(instance.classValue(), dist, instance.weight())); } else { pred = classifier.classifyInstance(classMissing); updateStatsForPredictor(pred, instance); } return pred; }
From source file:cotraining.copy.Evaluation_D.java
License:Open Source License
/** * store the prediction made by the classifier as a string * /* w ww. ja va 2 s .c o m*/ * @param classifier the classifier to use * @param inst the instance to generate text from * @param instNum the index in the dataset * @param attributesToOutput the indices of the attributes to output * @param printDistribution prints the complete distribution for nominal * classes, not just the predicted value * @return the prediction as a String * @throws Exception if something goes wrong * @see #printClassifications(Classifier, Instances, String, int, Range, boolean) */ protected static String predictionText(Classifier classifier, Instance inst, int instNum, Range attributesToOutput, boolean printDistribution) throws Exception { StringBuffer result = new StringBuffer(); int width = 10; int prec = 3; Instance withMissing = (Instance) inst.copy(); withMissing.setDataset(inst.dataset()); withMissing.setMissing(withMissing.classIndex()); double predValue = classifier.classifyInstance(withMissing); // index result.append(Utils.padLeft("" + (instNum + 1), 6)); if (inst.dataset().classAttribute().isNumeric()) { // actual if (inst.classIsMissing()) result.append(" " + Utils.padLeft("?", width)); else result.append(" " + Utils.doubleToString(inst.classValue(), width, prec)); // predicted if (Instance.isMissingValue(predValue)) result.append(" " + Utils.padLeft("?", width)); else result.append(" " + Utils.doubleToString(predValue, width, prec)); // error if (Instance.isMissingValue(predValue) || inst.classIsMissing()) result.append(" " + Utils.padLeft("?", width)); else result.append(" " + Utils.doubleToString(predValue - inst.classValue(), width, prec)); } else { // actual result.append(" " + Utils.padLeft(((int) inst.classValue() + 1) + ":" + inst.toString(inst.classIndex()), width)); // predicted if (Instance.isMissingValue(predValue)) result.append(" " + Utils.padLeft("?", width)); else result.append(" " + Utils.padLeft( ((int) predValue + 1) + ":" + inst.dataset().classAttribute().value((int) predValue), width)); // error? if (!Instance.isMissingValue(predValue) && !inst.classIsMissing() && ((int) predValue + 1 != (int) inst.classValue() + 1)) result.append(" " + " + "); else result.append(" " + " "); // prediction/distribution if (printDistribution) { if (Instance.isMissingValue(predValue)) { result.append(" " + "?"); } else { result.append(" "); double[] dist = classifier.distributionForInstance(withMissing); for (int n = 0; n < dist.length; n++) { if (n > 0) result.append(","); if (n == (int) predValue) result.append("*"); result.append(Utils.doubleToString(dist[n], prec)); } } } else { if (Instance.isMissingValue(predValue)) result.append(" " + "?"); else result.append(" " + Utils.doubleToString( classifier.distributionForInstance(withMissing)[(int) predValue], prec)); } } // attributes result.append(" " + attributeValuesString(withMissing, attributesToOutput) + "\n"); return result.toString(); }
From source file:de.fub.maps.project.detector.model.inference.processhandler.InferenceDataProcessHandler.java
License:Open Source License
@Override protected void handle() { clearResults();/*from w w w. java2 s .co m*/ Classifier classifier = getInferenceModel().getClassifier(); HashSet<TrackSegment> inferenceDataSet = getInferenceDataSet(); Collection<Attribute> attributeList = getInferenceModel().getAttributes(); if (!attributeList.isEmpty()) { Set<String> keySet = getInferenceModel().getInput().getTrainingsSet().keySet(); setClassesToView(keySet); Instances unlabeledInstances = new Instances("Unlabeld Tracks", new ArrayList<Attribute>(attributeList), 0); //NO18N unlabeledInstances.setClassIndex(0); ArrayList<TrackSegment> segmentList = new ArrayList<TrackSegment>(); for (TrackSegment segment : inferenceDataSet) { Instance instance = getInstance(segment); unlabeledInstances.add(instance); segmentList.add(segment); } // create copy Instances labeledInstances = new Instances(unlabeledInstances); for (int index = 0; index < labeledInstances.numInstances(); index++) { try { Instance instance = labeledInstances.instance(index); // classify instance double classifyed = classifier.classifyInstance(instance); instance.setClassValue(classifyed); // get class label String value = unlabeledInstances.classAttribute().value((int) classifyed); if (index < segmentList.size()) { instanceToTrackSegmentMap.put(instance, segmentList.get(index)); } // put label and instance to result map put(value, instance); } catch (Exception ex) { Exceptions.printStackTrace(ex); } } // update visw updateVisualRepresentation(); // update result set of the inferenceModel for (Entry<String, List<Instance>> entry : resultMap.entrySet()) { HashSet<TrackSegment> trackSegmentList = new HashSet<TrackSegment>(); for (Instance instance : entry.getValue()) { TrackSegment trackSegment = instanceToTrackSegmentMap.get(instance); if (trackSegment != null) { trackSegmentList.add(trackSegment); } } // only those classes are put into the result data set, which are not empty if (!trackSegmentList.isEmpty()) { getInferenceModel().getResult().put(entry.getKey(), trackSegmentList); } } } else { throw new InferenceModelClassifyException(MessageFormat .format("No attributes available. Attribute list lengeth == {0}", attributeList.size())); } resultMap.clear(); instanceToTrackSegmentMap.clear(); }
From source file:de.fub.maps.project.detector.model.inference.processhandler.SpecialInferenceDataProcessHandler.java
License:Open Source License
@Override protected void handle() { clearResults();//from ww w.j a v a2s.co m Classifier classifier = getInferenceModel().getClassifier(); Collection<Attribute> attributeList = getInferenceModel().getAttributes(); if (!attributeList.isEmpty()) { Set<String> keySet = getInferenceModel().getInput().getTrainingsSet().keySet(); setClassesToView(keySet); Instances unlabeledInstances = new Instances("Unlabeld Tracks", new ArrayList<Attribute>(attributeList), 0); //NO18N unlabeledInstances.setClassIndex(0); ArrayList<TrackSegment> segmentList = new ArrayList<TrackSegment>(); for (Entry<String, HashSet<TrackSegment>> entry : getInferenceModel().getInput().getTrainingsSet() .entrySet()) { for (TrackSegment segment : entry.getValue()) { segment.setLabel(entry.getKey()); Instance instance = getInstance(segment); unlabeledInstances.add(instance); segmentList.add(segment); } } // create copy Instances labeledInstances = new Instances(unlabeledInstances); for (int index = 0; index < labeledInstances.numInstances(); index++) { try { Instance instance = labeledInstances.instance(index); // classify instance double classifyed = classifier.classifyInstance(instance); instance.setClassValue(classifyed); // get class label String value = unlabeledInstances.classAttribute().value((int) classifyed); if (index < segmentList.size()) { instanceToTrackSegmentMap.put(instance, segmentList.get(index)); } // put label and instance to result map put(value, instance); } catch (Exception ex) { Exceptions.printStackTrace(ex); } } // update visw updateVisualRepresentation(); // update result set of the inferenceModel for (Map.Entry<String, List<Instance>> entry : resultMap.entrySet()) { HashSet<TrackSegment> trackSegmentList = new HashSet<TrackSegment>(); for (Instance instance : entry.getValue()) { TrackSegment trackSegment = instanceToTrackSegmentMap.get(instance); if (trackSegment != null) { trackSegmentList.add(trackSegment); } } // only those classes are put into the result data set, which are not empty if (!trackSegmentList.isEmpty()) { getInferenceModel().getResult().put(entry.getKey(), trackSegmentList); } } } else { throw new InferenceModelClassifyException(MessageFormat .format("No attributes available. Attribute list lengeth == {0}", attributeList.size())); } resultMap.clear(); instanceToTrackSegmentMap.clear(); }
From source file:de.tudarmstadt.ukp.alignment.framework.combined.WekaMachineLearning.java
License:Apache License
/** * * This method applies a serialized WEKA model file to an unlabeld .arff file for classification * * * @param input_arff the annotated gold standard in an .arff file * @param model output file for the model * @param output output file for evaluation of trained classifier (10-fold cross validation) * @throws Exception/* w ww .j a v a2s . c o m*/ */ public static void applyModelToUnlabeledArff(String input_arff, String model, String output) throws Exception { DataSource source = new DataSource(input_arff); Instances unlabeled = source.getDataSet(); if (unlabeled.classIndex() == -1) { unlabeled.setClassIndex(unlabeled.numAttributes() - 1); } Remove rm = new Remove(); rm.setAttributeIndices("1"); // remove ID attribute ObjectInputStream ois = new ObjectInputStream(new FileInputStream(model)); Classifier cls = (Classifier) ois.readObject(); ois.close(); // create copy Instances labeled = new Instances(unlabeled); // label instances for (int i = 0; i < unlabeled.numInstances(); i++) { double clsLabel = cls.classifyInstance(unlabeled.instance(i)); labeled.instance(i).setClassValue(clsLabel); } // save labeled data BufferedWriter writer = new BufferedWriter(new FileWriter(output)); writer.write(labeled.toString()); writer.newLine(); writer.flush(); writer.close(); }
From source file:de.tudarmstadt.ukp.dkpro.spelling.experiments.hoo2012.featureextraction.AllFeaturesExtractor.java
License:Apache License
@Override public void process(JCas jcas) throws AnalysisEngineProcessException { Collection<Token> tokens = JCasUtil.select(jcas, Token.class); if (isTest) { Instances trainData = null;//from ww w .j ava 2s . co m Classifier cl = null; try { trainData = getInstances(trainingArff); cl = getClassifier(); // SpreadSubsample spread = new SpreadSubsample(); // spread.setDistributionSpread(1.0); // // FilteredClassifier fc = new FilteredClassifier(); // fc.setFilter(spread); // fc.setClassifier(cl); cl.buildClassifier(trainData); } catch (Exception e) { throw new AnalysisEngineProcessException(e); } for (Token token : tokens) { String tokenString = token.getCoveredText(); if (tokenString.length() > 0 && confusionSet.contains(tokenString)) { Instance<String> instance = new Instance<String>(); for (SimpleFeatureExtractor featExt : featureExtractors) { instance.addAll(featExt.extract(jcas, token)); } instance.setOutcome(tokenString); List<String> classValues = new ArrayList<String>(); for (Enumeration e = trainData.classAttribute().enumerateValues(); e.hasMoreElements();) { classValues.add(e.nextElement().toString()); } // build classifier from training arff and classify try { weka.core.Instance wekaInstance = CleartkInstanceConverter.toWekaInstance(instance, classValues); System.out.println(wekaInstance); double prediction = cl.classifyInstance(wekaInstance); // prediction is the index in the class labels, not the class label itself! String outcome = trainData.classAttribute().value(new Double(prediction).intValue()); if (!tokenString.equals(outcome)) { SpellingAnomaly ann = new SpellingAnomaly(jcas, token.getBegin(), token.getEnd()); ann.setCategory(errorClass); ann.setSuggestions(SpellingUtils.getSuggestedActionArray(jcas, outcome)); ann.addToIndexes(); } } catch (Exception e) { throw new AnalysisEngineProcessException(e); } } } } else { for (Token token : tokens) { String tokenString = token.getCoveredText(); if (tokenString.length() > 0 && confusionSet.contains(tokenString)) { Instance<String> instance = new Instance<String>(); for (SimpleFeatureExtractor featExt : featureExtractors) { instance.addAll(featExt.extract(jcas, token)); } instance.setOutcome(tokenString); // we also need to add a negative example // choose it randomly from the confusion set without the actual token // TODO implement negative examples this.dataWriter.write(instance); } } } }
From source file:edu.oregonstate.eecs.mcplan.abstraction.EvaluateSimilarityFunction.java
License:Open Source License
public static ClusterContingencyTable evaluateClassifier(final Classifier classifier, final Instances test) { try {//from w ww . j av a 2 s.co m final Map<Integer, Set<RealVector>> Umap = new TreeMap<Integer, Set<RealVector>>(); final Map<Integer, Set<RealVector>> Vmap = new TreeMap<Integer, Set<RealVector>>(); final Remove rm_filter = new Remove(); rm_filter.setAttributeIndicesArray(new int[] { test.classIndex() }); rm_filter.setInputFormat(test); for (final Instance i : test) { rm_filter.input(i); final double[] phi = rm_filter.output().toDoubleArray(); // final double[] phi = WekaUtil.unlabeledFeatures( i ); final int cluster = (int) classifier.classifyInstance(i); Set<RealVector> u = Umap.get(cluster); if (u == null) { u = new HashSet<RealVector>(); Umap.put(cluster, u); } u.add(new ArrayRealVector(phi)); final int true_label = (int) i.classValue(); Set<RealVector> v = Vmap.get(true_label); if (v == null) { v = new HashSet<RealVector>(); Vmap.put(true_label, v); } v.add(new ArrayRealVector(phi)); } final ArrayList<Set<RealVector>> U = new ArrayList<Set<RealVector>>(); for (final Map.Entry<Integer, Set<RealVector>> e : Umap.entrySet()) { U.add(e.getValue()); } final ArrayList<Set<RealVector>> V = new ArrayList<Set<RealVector>>(); for (final Map.Entry<Integer, Set<RealVector>> e : Vmap.entrySet()) { V.add(e.getValue()); } return new ClusterContingencyTable(U, V); } catch (final RuntimeException ex) { throw ex; } catch (final Exception ex) { throw new RuntimeException(ex); } }
From source file:edu.oregonstate.eecs.mcplan.abstraction.WekaUtil.java
License:Open Source License
/** * Classify a feature vector that is not part of an Instances object. * @param classifier//from w w w. j a v a 2s .c o m * @param attributes * @param features * @return */ public static double classify(final Classifier classifier, final List<Attribute> attributes, final double[] features) { final Instances x = createSingletonInstances(attributes, features); try { return classifier.classifyInstance(x.get(0)); } catch (final Exception ex) { throw new RuntimeException(ex); } }
From source file:edu.umbc.cs.maple.utils.WekaUtils.java
License:Open Source License
/** Uses the given model to predict the classes of the data. * @param model/*from w ww . ja va 2 s. com*/ * @param data * @return An array of the class predictions. */ public static int[] predictClasses(Classifier model, Instances data) { int[] predictions = new int[data.numInstances()]; int numInstances = data.numInstances(); for (int instIdx = 0; instIdx < numInstances; instIdx++) { try { predictions[instIdx] = (int) model.classifyInstance(data.instance(instIdx)); } catch (Exception e) { predictions[instIdx] = -1; } } return predictions; }