List of usage examples for weka.classifiers Evaluation crossValidateModel
public void crossValidateModel(Classifier classifier, Instances data, int numFolds, Random random) throws Exception
From source file:csav2.Weka_additive.java
public void createTrainingFeatureFile2(String input) throws Exception { String file = "Classifier\\featurefile_additive_trial2.arff"; ArffLoader loader = new ArffLoader(); //ATTRIBUTES//from www . ja v a 2s.co m Attribute attr[] = new Attribute[50]; //numeric attr[0] = new Attribute("Autosentiment"); attr[1] = new Attribute("PositiveMatch"); attr[2] = new Attribute("NegativeMatch"); //class FastVector classValue = new FastVector(3); classValue.addElement("p"); classValue.addElement("n"); classValue.addElement("o"); attr[3] = new Attribute("answer", classValue); FastVector attrs = new FastVector(); attrs.addElement(attr[0]); attrs.addElement(attr[1]); attrs.addElement(attr[2]); attrs.addElement(attr[3]); // Add Instances Instances dataset = new Instances("my_dataset", attrs, 0); if (new File(file).isFile()) { loader.setFile(new File(file)); dataset = loader.getDataSet(); } System.out.println("-----------------------------------------"); System.out.println(input); System.out.println("-----------------------------------------"); StringTokenizer tokenizer = new StringTokenizer(input); while (tokenizer.hasMoreTokens()) { Instance example = new Instance(4); for (int j = 0; j < 4; j++) { String st = tokenizer.nextToken(); System.out.println(j + " " + st); if (j == 0) example.setValue(attr[j], Float.parseFloat(st)); else if (j == 3) example.setValue(attr[j], st); else example.setValue(attr[j], Integer.parseInt(st)); } dataset.add(example); } //Save dataset ArffSaver saver = new ArffSaver(); saver.setInstances(dataset); saver.setFile(new File(file)); saver.writeBatch(); //Read dataset loader.setFile(new File(file)); dataset = loader.getDataSet(); //Build classifier dataset.setClassIndex(3); Classifier classifier = new J48(); classifier.buildClassifier(dataset); //Save classifier String file1 = "Classifier\\classifier_add_asAndpolarwords.model"; OutputStream os = new FileOutputStream(file1); ObjectOutputStream objectOutputStream = new ObjectOutputStream(os); objectOutputStream.writeObject(classifier); // Comment out if not needed //Read classifier back InputStream is = new FileInputStream(file1); ObjectInputStream objectInputStream = new ObjectInputStream(is); classifier = (Classifier) objectInputStream.readObject(); objectInputStream.close(); //Evaluate resample if needed //dataset = dataset.resample(new Random(42)); //split to 70:30 learn and test set double percent = 70.0; int trainSize = (int) Math.round(dataset.numInstances() * percent / 100); int testSize = dataset.numInstances() - trainSize; Instances train = new Instances(dataset, 0, trainSize); Instances test = new Instances(dataset, trainSize, testSize); train.setClassIndex(3); test.setClassIndex(3); //Evaluate Evaluation eval = new Evaluation(dataset); //trainset eval.crossValidateModel(classifier, dataset, 10, new Random(1)); System.out.println("EVALUATION:\n" + eval.toSummaryString()); System.out.println("WEIGHTED MEASURE:\n" + eval.weightedFMeasure()); System.out.println("WEIGHTED PRECISION:\n" + eval.weightedPrecision()); System.out.println("WEIGHTED RECALL:\n" + eval.weightedRecall()); }
From source file:csav2.Weka_additive.java
public void createTrainingFeatureFile3(String input) throws Exception { String file = "Classifier\\featurefile_additive_trial3.arff"; ArffLoader loader = new ArffLoader(); //ATTRIBUTES//from ww w .j a va 2s. co m Attribute attr[] = new Attribute[50]; //numeric attr[0] = new Attribute("Autosentiment"); attr[1] = new Attribute("PositiveMatch"); attr[2] = new Attribute("NegativeMatch"); attr[3] = new Attribute("FW"); attr[4] = new Attribute("JJ"); attr[5] = new Attribute("RB"); attr[6] = new Attribute("RB_JJ"); //class FastVector classValue = new FastVector(3); classValue.addElement("p"); classValue.addElement("n"); classValue.addElement("o"); attr[7] = new Attribute("answer", classValue); FastVector attrs = new FastVector(); attrs.addElement(attr[0]); attrs.addElement(attr[1]); attrs.addElement(attr[2]); attrs.addElement(attr[3]); attrs.addElement(attr[4]); attrs.addElement(attr[5]); attrs.addElement(attr[6]); attrs.addElement(attr[7]); // Add Instances Instances dataset = new Instances("my_dataset", attrs, 0); if (new File(file).isFile()) { loader.setFile(new File(file)); dataset = loader.getDataSet(); } System.out.println("-----------------------------------------"); System.out.println(input); System.out.println("-----------------------------------------"); StringTokenizer tokenizer = new StringTokenizer(input); while (tokenizer.hasMoreTokens()) { Instance example = new Instance(8); for (int j = 0; j < 8; j++) { String st = tokenizer.nextToken(); System.out.println(j + " " + st); if (j == 0) example.setValue(attr[j], Float.parseFloat(st)); else if (j == 7) example.setValue(attr[j], st); else example.setValue(attr[j], Integer.parseInt(st)); } dataset.add(example); } //Save dataset ArffSaver saver = new ArffSaver(); saver.setInstances(dataset); saver.setFile(new File(file)); saver.writeBatch(); //Read dataset loader.setFile(new File(file)); dataset = loader.getDataSet(); //Build classifier dataset.setClassIndex(7); Classifier classifier = new J48(); classifier.buildClassifier(dataset); //Save classifier String file1 = "Classifier\\classifier_add_asAndpolarwordsAndpos.model"; OutputStream os = new FileOutputStream(file1); ObjectOutputStream objectOutputStream = new ObjectOutputStream(os); objectOutputStream.writeObject(classifier); // Comment out if not needed //Read classifier back InputStream is = new FileInputStream(file1); ObjectInputStream objectInputStream = new ObjectInputStream(is); classifier = (Classifier) objectInputStream.readObject(); objectInputStream.close(); //Evaluate resample if needed //dataset = dataset.resample(new Random(42)); //split to 70:30 learn and test set double percent = 70.0; int trainSize = (int) Math.round(dataset.numInstances() * percent / 100); int testSize = dataset.numInstances() - trainSize; Instances train = new Instances(dataset, 0, trainSize); Instances test = new Instances(dataset, trainSize, testSize); train.setClassIndex(7); test.setClassIndex(7); //Evaluate Evaluation eval = new Evaluation(dataset); //trainset eval.crossValidateModel(classifier, dataset, 10, new Random(1)); System.out.println("EVALUATION:\n" + eval.toSummaryString()); System.out.println("WEIGHTED MEASURE:\n" + eval.weightedFMeasure()); System.out.println("WEIGHTED PRECISION:\n" + eval.weightedPrecision()); System.out.println("WEIGHTED RECALL:\n" + eval.weightedRecall()); }
From source file:csav2.Weka_additive.java
public void createTrainingFeatureFile4(String input) throws Exception { String file = "Classifier\\featurefile_additive_trial4.arff"; ArffLoader loader = new ArffLoader(); //ATTRIBUTES//from w w w . jav a 2 s . c o m Attribute attr[] = new Attribute[50]; //numeric attr[0] = new Attribute("Autosentiment"); attr[1] = new Attribute("PositiveMatch"); attr[2] = new Attribute("NegativeMatch"); attr[3] = new Attribute("FW"); attr[4] = new Attribute("JJ"); attr[5] = new Attribute("RB"); attr[6] = new Attribute("RB_JJ"); attr[7] = new Attribute("amod"); attr[8] = new Attribute("acomp"); attr[9] = new Attribute("advmod"); //class FastVector classValue = new FastVector(3); classValue.addElement("p"); classValue.addElement("n"); classValue.addElement("o"); attr[10] = new Attribute("answer", classValue); FastVector attrs = new FastVector(); attrs.addElement(attr[0]); attrs.addElement(attr[1]); attrs.addElement(attr[2]); attrs.addElement(attr[3]); attrs.addElement(attr[4]); attrs.addElement(attr[5]); attrs.addElement(attr[6]); attrs.addElement(attr[7]); attrs.addElement(attr[8]); attrs.addElement(attr[9]); attrs.addElement(attr[10]); // Add Instances Instances dataset = new Instances("my_dataset", attrs, 0); if (new File(file).isFile()) { loader.setFile(new File(file)); dataset = loader.getDataSet(); } System.out.println("-----------------------------------------"); System.out.println(input); System.out.println("-----------------------------------------"); StringTokenizer tokenizer = new StringTokenizer(input); while (tokenizer.hasMoreTokens()) { Instance example = new Instance(11); for (int j = 0; j < 11; j++) { String st = tokenizer.nextToken(); System.out.println(j + " " + st); if (j == 0) example.setValue(attr[j], Float.parseFloat(st)); else if (j == 10) example.setValue(attr[j], st); else example.setValue(attr[j], Integer.parseInt(st)); } dataset.add(example); } //Save dataset ArffSaver saver = new ArffSaver(); saver.setInstances(dataset); saver.setFile(new File(file)); saver.writeBatch(); //Read dataset loader.setFile(new File(file)); dataset = loader.getDataSet(); //Build classifier dataset.setClassIndex(10); Classifier classifier = new J48(); classifier.buildClassifier(dataset); //Save classifier String file1 = "Classifier\\classifier_asAndpolarwordsAndposAnddep.model"; OutputStream os = new FileOutputStream(file1); ObjectOutputStream objectOutputStream = new ObjectOutputStream(os); objectOutputStream.writeObject(classifier); // Comment out if not needed //Read classifier back InputStream is = new FileInputStream(file1); ObjectInputStream objectInputStream = new ObjectInputStream(is); classifier = (Classifier) objectInputStream.readObject(); objectInputStream.close(); //Evaluate resample if needed //dataset = dataset.resample(new Random(42)); //split to 70:30 learn and test set double percent = 70.0; int trainSize = (int) Math.round(dataset.numInstances() * percent / 100); int testSize = dataset.numInstances() - trainSize; Instances train = new Instances(dataset, 0, trainSize); Instances test = new Instances(dataset, trainSize, testSize); train.setClassIndex(10); test.setClassIndex(10); //Evaluate Evaluation eval = new Evaluation(dataset); //trainset eval.crossValidateModel(classifier, dataset, 10, new Random(1)); System.out.println("EVALUATION:\n" + eval.toSummaryString()); System.out.println("WEIGHTED MEASURE:\n" + eval.weightedFMeasure()); System.out.println("WEIGHTED PRECISION:\n" + eval.weightedPrecision()); System.out.println("WEIGHTED RECALL:\n" + eval.weightedRecall()); }
From source file:csav2.Weka_additive.java
public void createTrainingFeatureFile5(String input) throws Exception { String file = "Classifier\\featurefile_additive_trial5.arff"; ArffLoader loader = new ArffLoader(); //ATTRIBUTES// www .ja v a 2 s .c o m Attribute attr[] = new Attribute[50]; //numeric attr[0] = new Attribute("Autosentiment"); attr[1] = new Attribute("PositiveMatch"); attr[2] = new Attribute("NegativeMatch"); attr[3] = new Attribute("FW"); attr[4] = new Attribute("JJ"); attr[5] = new Attribute("RB"); attr[6] = new Attribute("RB_JJ"); attr[7] = new Attribute("amod"); attr[8] = new Attribute("acomp"); attr[9] = new Attribute("advmod"); attr[10] = new Attribute("BLPos"); attr[11] = new Attribute("BLNeg"); //class FastVector classValue = new FastVector(3); classValue.addElement("p"); classValue.addElement("n"); classValue.addElement("o"); attr[12] = new Attribute("answer", classValue); FastVector attrs = new FastVector(); attrs.addElement(attr[0]); attrs.addElement(attr[1]); attrs.addElement(attr[2]); attrs.addElement(attr[3]); attrs.addElement(attr[4]); attrs.addElement(attr[5]); attrs.addElement(attr[6]); attrs.addElement(attr[7]); attrs.addElement(attr[8]); attrs.addElement(attr[9]); attrs.addElement(attr[10]); attrs.addElement(attr[11]); attrs.addElement(attr[12]); // Add Instances Instances dataset = new Instances("my_dataset", attrs, 0); if (new File(file).isFile()) { loader.setFile(new File(file)); dataset = loader.getDataSet(); } System.out.println("-----------------------------------------"); System.out.println(input); System.out.println("-----------------------------------------"); StringTokenizer tokenizer = new StringTokenizer(input); while (tokenizer.hasMoreTokens()) { Instance example = new Instance(13); for (int j = 0; j < 13; j++) { String st = tokenizer.nextToken(); System.out.println(j + " " + st); if (j == 0) example.setValue(attr[j], Float.parseFloat(st)); else if (j == 12) example.setValue(attr[j], st); else example.setValue(attr[j], Integer.parseInt(st)); } dataset.add(example); } //Save dataset ArffSaver saver = new ArffSaver(); saver.setInstances(dataset); saver.setFile(new File(file)); saver.writeBatch(); //Read dataset loader.setFile(new File(file)); dataset = loader.getDataSet(); //Build classifier dataset.setClassIndex(12); Classifier classifier = new J48(); classifier.buildClassifier(dataset); //Save classifier String file1 = "Classifier\\classifier_add_asAndpolarwordsAndposAnddepAndbl.model"; OutputStream os = new FileOutputStream(file1); ObjectOutputStream objectOutputStream = new ObjectOutputStream(os); objectOutputStream.writeObject(classifier); // Comment out if not needed //Read classifier back InputStream is = new FileInputStream(file1); ObjectInputStream objectInputStream = new ObjectInputStream(is); classifier = (Classifier) objectInputStream.readObject(); objectInputStream.close(); //Evaluate resample if needed //dataset = dataset.resample(new Random(42)); //split to 70:30 learn and test set double percent = 70.0; int trainSize = (int) Math.round(dataset.numInstances() * percent / 100); int testSize = dataset.numInstances() - trainSize; Instances train = new Instances(dataset, 0, trainSize); Instances test = new Instances(dataset, trainSize, testSize); train.setClassIndex(12); test.setClassIndex(12); //Evaluate Evaluation eval = new Evaluation(dataset); //trainset eval.crossValidateModel(classifier, dataset, 10, new Random(1)); System.out.println("EVALUATION:\n" + eval.toSummaryString()); System.out.println("WEIGHTED MEASURE:\n" + eval.weightedFMeasure()); System.out.println("WEIGHTED PRECISION:\n" + eval.weightedPrecision()); System.out.println("WEIGHTED RECALL:\n" + eval.weightedRecall()); }
From source file:csav2.Weka_additive.java
public void createTrainingFeatureFile6(String input) throws Exception { String file = "Classifier\\featurefile_additive_trial6.arff"; ArffLoader loader = new ArffLoader(); //ATTRIBUTES//from w w w. ja v a 2 s .c o m Attribute attr[] = new Attribute[50]; attr[0] = new Attribute("Autosentiment"); attr[1] = new Attribute("PositiveMatch"); attr[2] = new Attribute("NegativeMatch"); attr[3] = new Attribute("FW"); attr[4] = new Attribute("JJ"); attr[5] = new Attribute("RB"); attr[6] = new Attribute("RB_JJ"); attr[7] = new Attribute("amod"); attr[8] = new Attribute("acomp"); attr[9] = new Attribute("advmod"); attr[10] = new Attribute("BLPos"); attr[11] = new Attribute("BLNeg"); attr[12] = new Attribute("VSPositive"); attr[13] = new Attribute("VSNegative"); //class FastVector classValue = new FastVector(3); classValue.addElement("p"); classValue.addElement("n"); classValue.addElement("o"); attr[14] = new Attribute("answer", classValue); FastVector attrs = new FastVector(); attrs.addElement(attr[0]); attrs.addElement(attr[1]); attrs.addElement(attr[2]); attrs.addElement(attr[3]); attrs.addElement(attr[4]); attrs.addElement(attr[5]); attrs.addElement(attr[6]); attrs.addElement(attr[7]); attrs.addElement(attr[8]); attrs.addElement(attr[9]); attrs.addElement(attr[10]); attrs.addElement(attr[11]); attrs.addElement(attr[12]); attrs.addElement(attr[13]); attrs.addElement(attr[14]); // Add Instances Instances dataset = new Instances("my_dataset", attrs, 0); if (new File(file).isFile()) { loader.setFile(new File(file)); dataset = loader.getDataSet(); } System.out.println("-----------------------------------------"); System.out.println(input); System.out.println("-----------------------------------------"); StringTokenizer tokenizer = new StringTokenizer(input); while (tokenizer.hasMoreTokens()) { Instance example = new Instance(15); for (int j = 0; j < 15; j++) { String st = tokenizer.nextToken(); System.out.println(j + " " + st); if (j == 0) example.setValue(attr[j], Float.parseFloat(st)); else if (j == 14) example.setValue(attr[j], st); else example.setValue(attr[j], Integer.parseInt(st)); } dataset.add(example); } //Save dataset ArffSaver saver = new ArffSaver(); saver.setInstances(dataset); saver.setFile(new File(file)); saver.writeBatch(); //Read dataset loader.setFile(new File(file)); dataset = loader.getDataSet(); //Build classifier dataset.setClassIndex(14); Classifier classifier = new J48(); classifier.buildClassifier(dataset); //Save classifier String file1 = "Classifier\\classifier_add_asAndpolarwordsAndposAnddepAndblAndvs.model"; OutputStream os = new FileOutputStream(file1); ObjectOutputStream objectOutputStream = new ObjectOutputStream(os); objectOutputStream.writeObject(classifier); // Comment out if not needed //Read classifier back InputStream is = new FileInputStream(file1); ObjectInputStream objectInputStream = new ObjectInputStream(is); classifier = (Classifier) objectInputStream.readObject(); objectInputStream.close(); //Evaluate resample if needed //dataset = dataset.resample(new Random(42)); //split to 70:30 learn and test set double percent = 70.0; int trainSize = (int) Math.round(dataset.numInstances() * percent / 100); int testSize = dataset.numInstances() - trainSize; Instances train = new Instances(dataset, 0, trainSize); Instances test = new Instances(dataset, trainSize, testSize); train.setClassIndex(14); test.setClassIndex(14); //Evaluate Evaluation eval = new Evaluation(dataset); //trainset eval.crossValidateModel(classifier, dataset, 10, new Random(1)); System.out.println("EVALUATION:\n" + eval.toSummaryString()); System.out.println("WEIGHTED MEASURE:\n" + eval.weightedFMeasure()); System.out.println("WEIGHTED PRECISION:\n" + eval.weightedPrecision()); System.out.println("WEIGHTED RECALL:\n" + eval.weightedRecall()); }
From source file:cyber009.main.UDALNeuralNetwork.java
public static void main(String[] args) { UDALNeuralNetwork udal = new UDALNeuralNetwork(0.014013); Statistics statis = new Statistics(udal.v); long timeStart = 0, timeEnd = 0; for (int f = 2; f <= 2; f++) { udal.initUDAL(4, 5000);/* ww w .ja v a 2 s . com*/ udal.activeLearning(0, 5000); udal.arraytoInstances(); udal.ann.weightReset(); timeStart = System.currentTimeMillis(); MultilayerPerceptron wekaNN = new MultilayerPerceptron(); wekaNN.setAutoBuild(true); //wekaNN.setGUI(true); try { wekaNN.buildClassifier(udal.dataSet); Evaluation eval = new Evaluation(udal.dataSet); System.out.println(wekaNN.toString()); eval.crossValidateModel(wekaNN, udal.dataSet, 4999, new Random(System.currentTimeMillis())); System.out.println(wekaNN.toString()); System.out.println(eval.toClassDetailsString()); // udal.ann.gradientDescent(10000L, 3, 100); // for (Double target : udal.v.CLASSES) { // statis.calMVMuSigma(target); // System.out.println(udal.v.N_DATA_IN_CLASS.get(target)); // System.out.println(statis.mu.get(target)); // System.out.println(statis.sigma.get(target)); // } // for(int d=0; d<udal.v.D; d++) { // if(udal.v.LABEL[d] == false) { // double [][] val = new double[udal.v.N-1][1]; // for(int n=1; n<udal.v.N; n++) { // val[n-1][0] = udal.v.X[d][n]; //// System.out.print(udal.v.X[d][n] + " "); //// System.out.println(val[n-1][0]); // } // Matrix mVal = new Matrix(val); // double pp = 0.0D; // for (Double target : udal.v.CLASSES) { // //System.out.println("-----------------------\nClass:"+ target); // pp += statis.posteriorDistribution(target, mVal); // System.out.println("conditional: Entropy: "+ // statis.conditionalEntropy(target, mVal, d)); // } // System.out.print("Sum posterior:"+ pp+ " for "+new Matrix(val).transpose()); // // } // } // System.out.println("-----------------------"); // timeEnd = System.currentTimeMillis(); // System.out.println("feature #:"+udal.v.N+" time:("+ (timeEnd - timeStart) +")"); // udal.v.showResult(); // } catch (Exception ex) { Logger.getLogger(UDALNeuralNetwork.class.getName()).log(Level.SEVERE, null, ex); } } }
From source file:de.fub.maps.project.detector.model.inference.processhandler.CrossValidationProcessHandler.java
License:Open Source License
private void evaluate(Instances trainingSet) { try {/*from w w w .j av a 2 s . com*/ Evaluation evaluation = new Evaluation(trainingSet); int crossValidationFoldsCount = getCrossValidationFoldsCount(); crossValidationFoldsCount = crossValidationFoldsCount > trainingSet.size() ? trainingSet.size() : crossValidationFoldsCount; evaluation.crossValidateModel(getInferenceModel().getClassifier(), trainingSet, crossValidationFoldsCount, new Random(1)); updateVisualRepresentation(evaluation); } catch (Exception ex) { throw new InferenceModelClassifyException(ex.getMessage(), ex); } }
From source file:de.tudarmstadt.ukp.alignment.framework.combined.WekaMachineLearning.java
License:Apache License
/** * * This method creates a serialized WEKA model file from an .arff file containing the annotated gold standard * * * @param gs_arff the annotated gold standard in an .arff file * @param model output file for the model * @param output_eval if true, the evaluation of the trained classifier is printed (10-fold cross validation) * @throws Exception/*ww w . j a v a 2s. c o m*/ */ public static void createModelFromGoldstandard(String gs_arff, String model, boolean output_eval) throws Exception { DataSource source = new DataSource(gs_arff); Instances data = source.getDataSet(); if (data.classIndex() == -1) { data.setClassIndex(data.numAttributes() - 1); } Remove rm = new Remove(); rm.setAttributeIndices("1"); // remove ID attribute BayesNet bn = new BayesNet(); //Standard classifier; BNs proved most robust, but of course other classifiers are possible // meta-classifier FilteredClassifier fc = new FilteredClassifier(); fc.setFilter(rm); fc.setClassifier(bn); fc.buildClassifier(data); // build classifier SerializationHelper.write(model, fc); if (output_eval) { Evaluation eval = new Evaluation(data); eval.crossValidateModel(fc, data, 10, new Random(1)); System.out.println(eval.toSummaryString()); System.out.println(eval.toMatrixString()); System.out.println(eval.toClassDetailsString()); } }
From source file:edu.cmu.lti.oaqa.baseqa.providers.ml.classifiers.MekaProvider.java
License:Apache License
@Override public void train(List<Map<String, Double>> X, List<String> Y, boolean crossValidation) throws AnalysisEngineProcessException { // create attribute (including label) info ArrayList<Attribute> attributes = new ArrayList<>(); List<String> labelNames = ClassifierProvider.labelNames(Y); labelNames.stream().map(attr -> new Attribute(attr, Arrays.asList("y", "n"))) .forEachOrdered(attributes::add); List<String> featureNames = ClassifierProvider.featureNames(X); featureNames.stream().map(Attribute::new).forEachOrdered(attributes::add); String name = Files.getNameWithoutExtension(modelFile.getName()); datasetSchema = new Instances(name, attributes, 0); datasetSchema.setClassIndex(labelNames.size()); // add instances // due to the limitation of the interface definition, X, Y should be reorganized SetMultimap<Map<String, Double>, String> XY = HashMultimap.create(); IntStream.range(0, X.size()).forEach(i -> XY.put(X.get(i), Y.get(i))); Instances trainingInstances = new Instances(datasetSchema, XY.size()); for (Map.Entry<Map<String, Double>, Collection<String>> entry : XY.asMap().entrySet()) { Set<String> y = ImmutableSet.copyOf(entry.getValue()); Map<String, Double> x = entry.getKey(); SparseInstance instance = new SparseInstance(labelNames.size() + x.size()); for (String labelName : labelNames) { instance.setValue(datasetSchema.attribute(labelName), y.contains(labelName) ? "y" : "n"); }//from w w w . j av a 2s. c om for (Map.Entry<String, Double> e : x.entrySet()) { instance.setValue(datasetSchema.attribute(e.getKey()), e.getValue()); } trainingInstances.add(instance); } // training try { classifier = (MultiLabelClassifier) AbstractClassifier.forName(classifierName, options); classifier.buildClassifier(trainingInstances); } catch (Exception e) { throw new AnalysisEngineProcessException(e); } try { SerializationHelper.write(modelFile.getAbsolutePath(), classifier); SerializationHelper.write(datasetSchemaFile.getAbsolutePath(), datasetSchema); } catch (Exception e) { throw new AnalysisEngineProcessException(e); } if (crossValidation) { try { Evaluation eval = new Evaluation(trainingInstances); Random rand = new Random(); eval.crossValidateModel(classifier, trainingInstances, 10, rand); LOG.debug(eval.toSummaryString()); } catch (Exception e) { throw new AnalysisEngineProcessException(e); } } }
From source file:edu.cmu.lti.oaqa.baseqa.providers.ml.classifiers.WekaProvider.java
License:Apache License
@Override public void train(List<Map<String, Double>> X, List<String> Y, boolean crossValidation) throws AnalysisEngineProcessException { // create attribute (including label) info ArrayList<Attribute> attributes = new ArrayList<>(); ClassifierProvider.featureNames(X).stream().map(Attribute::new).forEachOrdered(attributes::add); Attribute label = new Attribute("__label__", ClassifierProvider.labelNames(Y)); attributes.add(label);/*from w w w .j a v a2s. c om*/ String name = Files.getNameWithoutExtension(modelFile.getName()); datasetSchema = new Instances(name, attributes, X.size()); datasetSchema.setClass(label); // add instances Instances trainingInstances = new Instances(datasetSchema, X.size()); if (balanceWeight) { Multiset<String> labelCounts = HashMultiset.create(Y); double maxCount = labelCounts.entrySet().stream().mapToInt(Multiset.Entry::getCount).max() .orElseThrow(AnalysisEngineProcessException::new); for (int i = 0; i < X.size(); i++) { String y = Y.get(i); double weight = maxCount / labelCounts.count(y); trainingInstances.add(newInstance(X.get(i), y, weight, trainingInstances)); } } else { for (int i = 0; i < X.size(); i++) { trainingInstances.add(newInstance(X.get(i), Y.get(i), 1.0, trainingInstances)); } } // training try { classifier = AbstractClassifier.forName(classifierName, options); classifier.buildClassifier(trainingInstances); } catch (Exception e) { throw new AnalysisEngineProcessException(e); } // write model and dataset schema try { SerializationHelper.write(modelFile.getAbsolutePath(), classifier); SerializationHelper.write(datasetSchemaFile.getAbsolutePath(), datasetSchema); } catch (Exception e) { throw new AnalysisEngineProcessException(e); } // backup training dataset as arff file if (datasetExportFile != null) { try { ArffSaver saver = new ArffSaver(); saver.setInstances(trainingInstances); saver.setFile(datasetExportFile); saver.writeBatch(); } catch (IOException e) { throw new AnalysisEngineProcessException(e); } } if (crossValidation) { try { Evaluation eval = new Evaluation(trainingInstances); Random rand = new Random(); eval.crossValidateModel(classifier, trainingInstances, 10, rand); LOG.debug(eval.toSummaryString()); } catch (Exception e) { throw new AnalysisEngineProcessException(e); } } }