List of usage examples for weka.classifiers Classifier buildClassifier
public abstract void buildClassifier(Instances data) throws Exception;
From source file:csav2.Weka_additive.java
public void createTrainingFeatureFile6(String input) throws Exception { String file = "Classifier\\featurefile_additive_trial6.arff"; ArffLoader loader = new ArffLoader(); //ATTRIBUTES/*from w w w . j a va 2s. c o m*/ Attribute attr[] = new Attribute[50]; attr[0] = new Attribute("Autosentiment"); attr[1] = new Attribute("PositiveMatch"); attr[2] = new Attribute("NegativeMatch"); attr[3] = new Attribute("FW"); attr[4] = new Attribute("JJ"); attr[5] = new Attribute("RB"); attr[6] = new Attribute("RB_JJ"); attr[7] = new Attribute("amod"); attr[8] = new Attribute("acomp"); attr[9] = new Attribute("advmod"); attr[10] = new Attribute("BLPos"); attr[11] = new Attribute("BLNeg"); attr[12] = new Attribute("VSPositive"); attr[13] = new Attribute("VSNegative"); //class FastVector classValue = new FastVector(3); classValue.addElement("p"); classValue.addElement("n"); classValue.addElement("o"); attr[14] = new Attribute("answer", classValue); FastVector attrs = new FastVector(); attrs.addElement(attr[0]); attrs.addElement(attr[1]); attrs.addElement(attr[2]); attrs.addElement(attr[3]); attrs.addElement(attr[4]); attrs.addElement(attr[5]); attrs.addElement(attr[6]); attrs.addElement(attr[7]); attrs.addElement(attr[8]); attrs.addElement(attr[9]); attrs.addElement(attr[10]); attrs.addElement(attr[11]); attrs.addElement(attr[12]); attrs.addElement(attr[13]); attrs.addElement(attr[14]); // Add Instances Instances dataset = new Instances("my_dataset", attrs, 0); if (new File(file).isFile()) { loader.setFile(new File(file)); dataset = loader.getDataSet(); } System.out.println("-----------------------------------------"); System.out.println(input); System.out.println("-----------------------------------------"); StringTokenizer tokenizer = new StringTokenizer(input); while (tokenizer.hasMoreTokens()) { Instance example = new Instance(15); for (int j = 0; j < 15; j++) { String st = tokenizer.nextToken(); System.out.println(j + " " + st); if (j == 0) example.setValue(attr[j], Float.parseFloat(st)); else if (j == 14) example.setValue(attr[j], st); else example.setValue(attr[j], Integer.parseInt(st)); } dataset.add(example); } //Save dataset ArffSaver saver = new ArffSaver(); saver.setInstances(dataset); saver.setFile(new File(file)); saver.writeBatch(); //Read dataset loader.setFile(new File(file)); dataset = loader.getDataSet(); //Build classifier dataset.setClassIndex(14); Classifier classifier = new J48(); classifier.buildClassifier(dataset); //Save classifier String file1 = "Classifier\\classifier_add_asAndpolarwordsAndposAnddepAndblAndvs.model"; OutputStream os = new FileOutputStream(file1); ObjectOutputStream objectOutputStream = new ObjectOutputStream(os); objectOutputStream.writeObject(classifier); // Comment out if not needed //Read classifier back InputStream is = new FileInputStream(file1); ObjectInputStream objectInputStream = new ObjectInputStream(is); classifier = (Classifier) objectInputStream.readObject(); objectInputStream.close(); //Evaluate resample if needed //dataset = dataset.resample(new Random(42)); //split to 70:30 learn and test set double percent = 70.0; int trainSize = (int) Math.round(dataset.numInstances() * percent / 100); int testSize = dataset.numInstances() - trainSize; Instances train = new Instances(dataset, 0, trainSize); Instances test = new Instances(dataset, trainSize, testSize); train.setClassIndex(14); test.setClassIndex(14); //Evaluate Evaluation eval = new Evaluation(dataset); //trainset eval.crossValidateModel(classifier, dataset, 10, new Random(1)); System.out.println("EVALUATION:\n" + eval.toSummaryString()); System.out.println("WEIGHTED MEASURE:\n" + eval.weightedFMeasure()); System.out.println("WEIGHTED PRECISION:\n" + eval.weightedPrecision()); System.out.println("WEIGHTED RECALL:\n" + eval.weightedRecall()); }
From source file:de.fub.maps.project.detector.model.inference.processhandler.TrainingsDataProcessHandler.java
License:Open Source License
private void evaluate(Instances trainingSet, Instances testingSet) { Classifier classifier = getInferenceModel().getClassifier(); try {/*from ww w . ja v a 2s. c o m*/ classifier.buildClassifier(trainingSet); Evaluation evaluation = new Evaluation(testingSet); evaluation.evaluateModel(classifier, testingSet); updateVisualRepresentation(evaluation); } catch (Exception ex) { throw new InferenceModelClassifyException(ex.getMessage(), ex); } }
From source file:de.tudarmstadt.ukp.dkpro.spelling.experiments.hoo2012.featureextraction.AllFeaturesExtractor.java
License:Apache License
@Override public void process(JCas jcas) throws AnalysisEngineProcessException { Collection<Token> tokens = JCasUtil.select(jcas, Token.class); if (isTest) { Instances trainData = null;/*from ww w.j av a 2 s . c o m*/ Classifier cl = null; try { trainData = getInstances(trainingArff); cl = getClassifier(); // SpreadSubsample spread = new SpreadSubsample(); // spread.setDistributionSpread(1.0); // // FilteredClassifier fc = new FilteredClassifier(); // fc.setFilter(spread); // fc.setClassifier(cl); cl.buildClassifier(trainData); } catch (Exception e) { throw new AnalysisEngineProcessException(e); } for (Token token : tokens) { String tokenString = token.getCoveredText(); if (tokenString.length() > 0 && confusionSet.contains(tokenString)) { Instance<String> instance = new Instance<String>(); for (SimpleFeatureExtractor featExt : featureExtractors) { instance.addAll(featExt.extract(jcas, token)); } instance.setOutcome(tokenString); List<String> classValues = new ArrayList<String>(); for (Enumeration e = trainData.classAttribute().enumerateValues(); e.hasMoreElements();) { classValues.add(e.nextElement().toString()); } // build classifier from training arff and classify try { weka.core.Instance wekaInstance = CleartkInstanceConverter.toWekaInstance(instance, classValues); System.out.println(wekaInstance); double prediction = cl.classifyInstance(wekaInstance); // prediction is the index in the class labels, not the class label itself! String outcome = trainData.classAttribute().value(new Double(prediction).intValue()); if (!tokenString.equals(outcome)) { SpellingAnomaly ann = new SpellingAnomaly(jcas, token.getBegin(), token.getEnd()); ann.setCategory(errorClass); ann.setSuggestions(SpellingUtils.getSuggestedActionArray(jcas, outcome)); ann.addToIndexes(); } } catch (Exception e) { throw new AnalysisEngineProcessException(e); } } } } else { for (Token token : tokens) { String tokenString = token.getCoveredText(); if (tokenString.length() > 0 && confusionSet.contains(tokenString)) { Instance<String> instance = new Instance<String>(); for (SimpleFeatureExtractor featExt : featureExtractors) { instance.addAll(featExt.extract(jcas, token)); } instance.setOutcome(tokenString); // we also need to add a negative example // choose it randomly from the confusion set without the actual token // TODO implement negative examples this.dataWriter.write(instance); } } } }
From source file:de.ugoe.cs.cpdp.dataselection.DecisionTreeSelection.java
License:Apache License
@Override public void apply(Instances testdata, SetUniqueList<Instances> traindataSet) { final Instances data = characteristicInstances(testdata, traindataSet); final ArrayList<String> attVals = new ArrayList<String>(); attVals.add("same"); attVals.add("more"); attVals.add("less"); final ArrayList<Attribute> atts = new ArrayList<Attribute>(); for (int j = 0; j < data.numAttributes(); j++) { atts.add(new Attribute(data.attribute(j).name(), attVals)); }/*from w ww . j a v a 2 s . co m*/ atts.add(new Attribute("score")); Instances similarityData = new Instances("similarity", atts, 0); similarityData.setClassIndex(similarityData.numAttributes() - 1); try { Classifier classifier = new J48(); for (int i = 0; i < traindataSet.size(); i++) { classifier.buildClassifier(traindataSet.get(i)); for (int j = 0; j < traindataSet.size(); j++) { if (i != j) { double[] similarity = new double[data.numAttributes() + 1]; for (int k = 0; k < data.numAttributes(); k++) { if (0.9 * data.get(i + 1).value(k) > data.get(j + 1).value(k)) { similarity[k] = 2.0; } else if (1.1 * data.get(i + 1).value(k) < data.get(j + 1).value(k)) { similarity[k] = 1.0; } else { similarity[k] = 0.0; } } Evaluation eval = new Evaluation(traindataSet.get(j)); eval.evaluateModel(classifier, traindataSet.get(j)); similarity[data.numAttributes()] = eval.fMeasure(1); similarityData.add(new DenseInstance(1.0, similarity)); } } } REPTree repTree = new REPTree(); if (repTree.getNumFolds() > similarityData.size()) { repTree.setNumFolds(similarityData.size()); } repTree.setNumFolds(2); repTree.buildClassifier(similarityData); Instances testTrainSimilarity = new Instances(similarityData); testTrainSimilarity.clear(); for (int i = 0; i < traindataSet.size(); i++) { double[] similarity = new double[data.numAttributes() + 1]; for (int k = 0; k < data.numAttributes(); k++) { if (0.9 * data.get(0).value(k) > data.get(i + 1).value(k)) { similarity[k] = 2.0; } else if (1.1 * data.get(0).value(k) < data.get(i + 1).value(k)) { similarity[k] = 1.0; } else { similarity[k] = 0.0; } } testTrainSimilarity.add(new DenseInstance(1.0, similarity)); } int bestScoringProductIndex = -1; double maxScore = Double.MIN_VALUE; for (int i = 0; i < traindataSet.size(); i++) { double score = repTree.classifyInstance(testTrainSimilarity.get(i)); if (score > maxScore) { maxScore = score; bestScoringProductIndex = i; } } Instances bestScoringProduct = traindataSet.get(bestScoringProductIndex); traindataSet.clear(); traindataSet.add(bestScoringProduct); } catch (Exception e) { Console.printerr("failure during DecisionTreeSelection: " + e.getMessage()); throw new RuntimeException(e); } }
From source file:de.unidue.langtech.grading.tc.LearningCurveTask.java
License:Open Source License
@Override public void execute(TaskContext aContext) throws Exception { boolean multiLabel = false; for (Integer numberInstances : NUMBER_OF_TRAINING_INSTANCES) { for (int iteration = 0; iteration < ITERATIONS; iteration++) { File arffFileTrain = new File( aContext.getStorageLocation(TEST_TASK_INPUT_KEY_TRAINING_DATA, AccessMode.READONLY) .getPath() + "/" + TRAINING_DATA_FILENAME); File arffFileTest = new File( aContext.getStorageLocation(TEST_TASK_INPUT_KEY_TEST_DATA, AccessMode.READONLY).getPath() + "/" + TRAINING_DATA_FILENAME); Instances trainData = TaskUtils.getInstances(arffFileTrain, multiLabel); Instances testData = TaskUtils.getInstances(arffFileTest, multiLabel); if (numberInstances > trainData.size()) { continue; }//w w w. java2s . c o m Classifier cl = AbstractClassifier.forName(classificationArguments.get(0), classificationArguments.subList(1, classificationArguments.size()).toArray(new String[0])); Instances copyTestData = new Instances(testData); trainData = WekaUtils.removeOutcomeId(trainData, multiLabel); testData = WekaUtils.removeOutcomeId(testData, multiLabel); Random generator = new Random(); generator.setSeed(System.nanoTime()); trainData.randomize(generator); // remove fraction of training data that should not be used for training for (int i = trainData.size() - 1; i >= numberInstances; i--) { trainData.delete(i); } // file to hold prediction results File evalOutput = new File( aContext.getStorageLocation(TEST_TASK_OUTPUT_KEY, AccessMode.READWRITE).getPath() + "/" + EVALUATION_DATA_FILENAME + "_" + numberInstances + "_" + iteration); // train the classifier on the train set split - not necessary in multilabel setup, but // in single label setup cl.buildClassifier(trainData); weka.core.SerializationHelper.write(evalOutput.getAbsolutePath(), WekaUtils.getEvaluationSinglelabel(cl, trainData, testData)); testData = WekaUtils.getPredictionInstancesSingleLabel(testData, cl); testData = WekaUtils.addOutcomeId(testData, copyTestData, false); // // Write out the predictions // DataSink.write(aContext.getStorageLocation(TEST_TASK_OUTPUT_KEY, AccessMode.READWRITE) // .getAbsolutePath() + "/" + PREDICTIONS_FILENAME + "_" + trainPercent, testData); } } }
From source file:entity.NfoldCrossValidationManager.java
License:Open Source License
/** * n fold cross validation without noise * /*from w ww.j a v a 2 s . c o m*/ * @param classifier * @param dataset * @param folds * @return */ public Stats crossValidate(Classifier classifier, Instances dataset, int folds) { // randomizes order of instances Instances randDataset = new Instances(dataset); randDataset.randomize(RandomizationManager.randomGenerator); // cross-validation Evaluation eval = null; try { eval = new Evaluation(randDataset); } catch (Exception e) { e.printStackTrace(); } for (int n = 0; n < folds; n++) { Instances test = randDataset.testCV(folds, n); Instances train = randDataset.trainCV(folds, n, RandomizationManager.randomGenerator); // build and evaluate classifier Classifier clsCopy; try { clsCopy = Classifier.makeCopy(classifier); clsCopy.buildClassifier(train); eval.evaluateModel(clsCopy, test); } catch (Exception e) { e.printStackTrace(); } } // output evaluation for the nfold cross validation Double precision = eval.precision(Settings.classificationChoice); Double recall = eval.recall(Settings.classificationChoice); Double fmeasure = eval.fMeasure(Settings.classificationChoice); Double classificationTP = eval.numTruePositives(Settings.classificationChoice); Double classificationTN = eval.numTrueNegatives(Settings.classificationChoice); Double classificationFP = eval.numFalsePositives(Settings.classificationChoice); Double classificationFN = eval.numFalseNegatives(Settings.classificationChoice); Double kappa = eval.kappa(); return new Stats(classificationTP, classificationTN, classificationFP, classificationFN, kappa, precision, recall, fmeasure); }
From source file:entity.NfoldCrossValidationManager.java
License:Open Source License
/** * n fold cross validation with noise (independent fp and fn) * /*w w w .jav a2 s . c o m*/ * @param classifier * @param dataset * @param folds * @return */ public Stats crossValidateWithNoise(Classifier classifier, Instances dataset, int folds, BigDecimal fpPercentage, BigDecimal fnPercentage) { // noise manager NoiseInjectionManager noiseInjectionManager = new NoiseInjectionManager(); // randomizes order of instances Instances randDataset = new Instances(dataset); randDataset.randomize(RandomizationManager.randomGenerator); // cross-validation Evaluation eval = null; try { eval = new Evaluation(randDataset); } catch (Exception e) { e.printStackTrace(); } for (int n = 0; n < folds; n++) { Instances test = randDataset.testCV(folds, n); Instances train = randDataset.trainCV(folds, n, RandomizationManager.randomGenerator); // copies instances of train set to not modify the original Instances noisyTrain = new Instances(train); // injects level of noise in the copied train set noiseInjectionManager.addNoiseToDataset(noisyTrain, fpPercentage, fnPercentage); // build and evaluate classifier Classifier clsCopy; try { clsCopy = Classifier.makeCopy(classifier); // trains the model using a noisy train set clsCopy.buildClassifier(noisyTrain); eval.evaluateModel(clsCopy, test); } catch (Exception e) { e.printStackTrace(); } } // output evaluation for the nfold cross validation Double precision = eval.precision(Settings.classificationChoice); Double recall = eval.recall(Settings.classificationChoice); Double fmeasure = eval.fMeasure(Settings.classificationChoice); Double classificationTP = eval.numTruePositives(Settings.classificationChoice); Double classificationTN = eval.numTrueNegatives(Settings.classificationChoice); Double classificationFP = eval.numFalsePositives(Settings.classificationChoice); Double classificationFN = eval.numFalseNegatives(Settings.classificationChoice); Double kappa = eval.kappa(); return new Stats(classificationTP, classificationTN, classificationFP, classificationFN, kappa, precision, recall, fmeasure); }
From source file:entity.NfoldCrossValidationManager.java
License:Open Source License
/** * n fold cross validation with noise (combined fp and fn) * /* w w w. ja v a 2 s . c o m*/ * @param classifier * @param dataset * @param folds * @return */ public Stats crossValidateWithNoise(Classifier classifier, Instances dataset, int folds, BigDecimal combinedFpFnPercentage) { // noise manager NoiseInjectionManager noiseInjectionManager = new NoiseInjectionManager(); // randomizes order of instances Instances randDataset = new Instances(dataset); randDataset.randomize(RandomizationManager.randomGenerator); // cross-validation Evaluation eval = null; try { eval = new Evaluation(randDataset); } catch (Exception e) { e.printStackTrace(); } for (int n = 0; n < folds; n++) { Instances test = randDataset.testCV(folds, n); Instances train = randDataset.trainCV(folds, n, RandomizationManager.randomGenerator); // copies instances of train set to not modify the original Instances noisyTrain = new Instances(train); // injects level of noise in the copied train set noiseInjectionManager.addNoiseToDataset(noisyTrain, combinedFpFnPercentage); // build and evaluate classifier Classifier clsCopy; try { clsCopy = Classifier.makeCopy(classifier); // trains the model using a noisy train set clsCopy.buildClassifier(noisyTrain); eval.evaluateModel(clsCopy, test); } catch (Exception e) { e.printStackTrace(); } } // output evaluation for the nfold cross validation Double precision = eval.precision(Settings.classificationChoice); Double recall = eval.recall(Settings.classificationChoice); Double fmeasure = eval.fMeasure(Settings.classificationChoice); Double classificationTP = eval.numTruePositives(Settings.classificationChoice); Double classificationTN = eval.numTrueNegatives(Settings.classificationChoice); Double classificationFP = eval.numFalsePositives(Settings.classificationChoice); Double classificationFN = eval.numFalseNegatives(Settings.classificationChoice); Double kappa = eval.kappa(); return new Stats(classificationTP, classificationTN, classificationFP, classificationFN, kappa, precision, recall, fmeasure); }
From source file:es.bsc.autonomic.powermodeller.tools.classifiers.BaggingClassifier.java
License:Apache License
@Override protected Classifier buildClassifier(DataSet training_ds) { logger.debug("Building Bagging classifier."); Classifier model = null; // Get the independent variable index String independent = training_ds.getIndependent(); if (independent == null) throw new WekaWrapperException("Independent variable is not set in dataset."); try {//from w w w .ja v a 2s . c o m // Read all the instances in the file (ARFF, CSV, XRFF, ...) ConverterUtils.DataSource source = new ConverterUtils.DataSource(training_ds.getFilePath()); Instances instances = source.getDataSet(); // Set the independent variable (powerWatts). instances.setClassIndex(instances.attribute(independent).index()); // Builds a regression model for the given data. model = new weka.classifiers.meta.Bagging(); // Build Linear Regression model.buildClassifier(instances); } catch (WekaWrapperException e) { logger.error("Error while creating Bagging classifier.", e); throw new WekaWrapperException("Error while creating Bagging classifier."); } catch (Exception e) { logger.error("Error while applying Bagging to data set instances.", e); throw new WekaWrapperException("Error while applying Bagging to data set instances."); } return model; }
From source file:es.bsc.autonomic.powermodeller.tools.classifiers.LinearRegressionClassifier.java
License:Apache License
@Override public Classifier buildClassifier(DataSet training_ds) { logger.debug("Building LinearRegression classifier."); Classifier model; // Get the independent variable index String independent = training_ds.getIndependent(); if (independent == null) throw new WekaWrapperException("Independent variable is not set in dataset."); try {/*from w w w .j a va2 s . c o m*/ // Read all the instances in the file (ARFF, CSV, XRFF, ...) ConverterUtils.DataSource source = new ConverterUtils.DataSource(training_ds.getFilePath()); Instances instances = source.getDataSet(); // Set the independent variable (powerWatts). instances.setClassIndex(instances.attribute(independent).index()); // Builds a regression model for the given data. model = new weka.classifiers.functions.LinearRegression(); // Build Linear Regression model.buildClassifier(instances); } catch (WekaWrapperException e) { logger.error("Error while creating Linear Regression classifier.", e); throw new WekaWrapperException("Error while creating Linear Regression classifier."); } catch (Exception e) { logger.error("Error while applying Linear Regression to data set instances.", e); throw new WekaWrapperException("Error while applying Linear Regression to data set instances."); } return model; }