List of usage examples for weka.core Instances testCV
public Instances testCV(int numFolds, int numFold)
From source file:machinelearningcw.EnhancedLinearPerceptron.java
public boolean crossValidation(Instances ins) throws Exception { //get the data Instances data = new Instances(ins); Instances train;// the new training data Instances test; // the new testing data int seed = 0; Random rand = new Random(seed); //randomize the data data.randomize(rand);/* w w w .ja v a2s . c o m*/ //number of folds int folds = 10; int offlineErrors = 0; int onlineErrors = 0; for (int i = 0; i < folds; i++) { train = data.trainCV(folds, i); test = data.testCV(folds, i); //add the the total errors for each //offlineErrors += offlinePerceptron(train); for (Instance inst : test) { if (classifyInstance(inst) != inst.classValue()) { offlineErrors += 1; } } //reset w Arrays.fill(w, 1); perceptron(train); for (Instance inst : test) { if (classifyInstance(inst) != inst.classValue()) { onlineErrors += 1; } } } // System.out.println(" off: " + offlineErrors); // System.out.println(" on: " + onlineErrors); //calculate the mean of the total errors offlineErrors = offlineErrors / folds; onlineErrors = onlineErrors / folds; // System.out.println(flag); return offlineErrors > onlineErrors; }
From source file:machinelearningproject.MachineLearningProject.java
/** * @param args the command line arguments *//* w w w .jav a 2 s .c o m*/ public static void main(String[] args) throws Exception { // TODO code application logic here DataSource source = new DataSource("D:\\spambase.arff"); // DataSource source = new DataSource("D:\\weather-nominal.arff"); Instances instances = source.getDataSet(); int numAttr = instances.numAttributes(); instances.setClassIndex(instances.numAttributes() - 1); int runs = 5; int seed = 15; for (int i = 0; i < runs; i++) { //randomize data seed = seed + 1; // the seed for randomizing the data Random rand = new Random(seed); // create seeded number generator Instances randData = new Instances(instances); // create copy of original data Collections.shuffle(randData); Evaluation evalDTree = new Evaluation(randData); Evaluation evalRF = new Evaluation(randData); Evaluation evalSVM = new Evaluation(randData); int folds = 10; for (int n = 0; n < folds; n++) { Instances train = randData.trainCV(folds, n, rand); Instances test = randData.testCV(folds, n); //instantiate classifiers DecisionTree dtree = new DecisionTree(); RandomForest rf = new RandomForest(100); SMO svm = new SMO(); RBFKernel rbfKernel = new RBFKernel(); double gamma = 0.70; rbfKernel.setGamma(gamma); dtree.buildClassifier(train); rf.buildClassifier(train); svm.buildClassifier(train); evalDTree.evaluateModel(dtree, test); evalRF.evaluateModel(rf, test); evalSVM.evaluateModel(svm, test); } System.out.println("=== Decision Tree Evaluation ==="); System.out.println(evalDTree.toSummaryString()); System.out.println(evalDTree.toClassDetailsString()); System.out.println(evalDTree.toMatrixString()); System.out.println("=== Random Forest Evaluation ==="); System.out.println(evalRF.toSummaryString()); System.out.println(evalRF.toClassDetailsString()); System.out.println(evalRF.toMatrixString()); System.out.println("=== SVM Evaluation ==="); System.out.println(evalSVM.toSummaryString()); System.out.println(evalSVM.toClassDetailsString()); System.out.println(evalSVM.toMatrixString()); } }
From source file:mao.datamining.ModelProcess.java
private void testCV(Classifier classifier, Instances finalTrainDataSet, FileOutputStream testCaseSummaryOut, TestResult result) {/*from w w w . j a v a 2 s .c o m*/ long start, end, trainTime = 0, testTime = 0; Evaluation evalAll = null; double confusionMatrix[][] = null; // randomize data, and then stratify it into 10 groups Random rand = new Random(1); Instances randData = new Instances(finalTrainDataSet); randData.randomize(rand); if (randData.classAttribute().isNominal()) { //always run with 10 cross validation randData.stratify(folds); } try { evalAll = new Evaluation(randData); for (int i = 0; i < folds; i++) { Evaluation eval = new Evaluation(randData); Instances train = randData.trainCV(folds, i); Instances test = randData.testCV(folds, i); //counting traininig time start = System.currentTimeMillis(); Classifier j48ClassifierCopy = Classifier.makeCopy(classifier); j48ClassifierCopy.buildClassifier(train); end = System.currentTimeMillis(); trainTime += end - start; //counting test time start = System.currentTimeMillis(); eval.evaluateModel(j48ClassifierCopy, test); evalAll.evaluateModel(j48ClassifierCopy, test); end = System.currentTimeMillis(); testTime += end - start; } } catch (Exception e) { ModelProcess.logging(null, e); } //end test by cross validation // output evaluation try { ModelProcess.logging(""); //write into summary file testCaseSummaryOut .write((evalAll.toSummaryString("=== Cross Validation Summary ===", true)).getBytes()); testCaseSummaryOut.write("\n".getBytes()); testCaseSummaryOut.write( (evalAll.toClassDetailsString("=== " + folds + "-fold Cross-validation Class Detail ===\n")) .getBytes()); testCaseSummaryOut.write("\n".getBytes()); testCaseSummaryOut .write((evalAll.toMatrixString("=== Confusion matrix for all folds ===\n")).getBytes()); testCaseSummaryOut.flush(); confusionMatrix = evalAll.confusionMatrix(); result.setConfusionMatrix10Folds(confusionMatrix); } catch (Exception e) { ModelProcess.logging(null, e); } }
From source file:meka.classifiers.multilabel.Evaluation.java
License:Open Source License
/** * CVModel - Split D into train/test folds, and then train and evaluate on each one. * @param h a multi-output classifier * @param D test data Instances * @param numFolds number of folds of CV * @param top Threshold OPtion (pertains to multi-label data only) * @param vop Verbosity OPtion (which measures do we want to calculate/output) * @return Result raw prediction data with evaluation statistics included. *//*from www . j a va 2s . com*/ public static Result cvModel(MultiLabelClassifier h, Instances D, int numFolds, String top, String vop) throws Exception { Result r_[] = new Result[numFolds]; for (int i = 0; i < numFolds; i++) { Instances D_train = D.trainCV(numFolds, i); Instances D_test = D.testCV(numFolds, i); if (h.getDebug()) System.out.println(":- Fold [" + i + "/" + numFolds + "] -: " + MLUtils.getDatasetName(D) + "\tL=" + D.classIndex() + "\tD(t:T)=(" + D_train.numInstances() + ":" + D_test.numInstances() + ")\tLC(t:T)=" + Utils.roundDouble(MLUtils.labelCardinality(D_train, D.classIndex()), 2) + ":" + Utils.roundDouble(MLUtils.labelCardinality(D_test, D.classIndex()), 2) + ")"); r_[i] = evaluateModel(h, D_train, D_test); // <-- should not run stats yet! } Result r = MLEvalUtils.combinePredictions(r_); if (h instanceof MultiTargetClassifier || isMT(D)) { r.setInfo("Type", "MT-CV"); } else if (h instanceof MultiLabelClassifier) { r.setInfo("Type", "ML-CV"); try { r.setInfo("Threshold", String.valueOf(Double.parseDouble(top))); } catch (Exception e) { System.err.println( "[WARNING] Automatic threshold calibration not currently enabled for cross-fold validation, setting threshold = 0.5.\n"); r.setInfo("Threshold", String.valueOf(0.5)); } } r.setInfo("Verbosity", vop); r.output = Result.getStats(r, vop); // Need to reset this because of CV r.setValue("Number of training instances", D.numInstances()); r.setValue("Number of test instances", D.numInstances()); return r; }
From source file:meka.classifiers.multilabel.Maniac.java
License:Open Source License
@Override public Instances transformLabels(Instances D) throws Exception { // crazy scala-specific stuff that is necessary to access // "static" methods from java org.kramerlab.autoencoder.package$ autoencoderStatics = org.kramerlab.autoencoder.package$.MODULE$; org.kramerlab.autoencoder.wekacompatibility.package$ wekaStatics = org.kramerlab.autoencoder.wekacompatibility.package$.MODULE$; org.kramerlab.autoencoder.experiments.package$ experimentsStatics = org.kramerlab.autoencoder.experiments.package$.MODULE$; int topiter = -1; // the optimization is a bit special, since we learn a stream // of autoencoders, no need to start from scratch, we just add layers if (this.isOptimizeAE()) { Instances train = D.trainCV(3, 1); Instances test = D.testCV(3, 1); Instances labels = this.extractPart(train, true); // first convert the arff into non sparse form SparseToNonSparse spfilter = new SparseToNonSparse(); spfilter.setInputFormat(labels); Instances aeData = Filter.useFilter(labels, spfilter); // now convert it into a format suitable for the autoencoder Mat data = wekaStatics.instancesToMat(aeData); Iterable<Autoencoder> autoencoders = autoencoderStatics.deepAutoencoderStream_java( autoencoderStatics.Sigmoid(), // type of neurons. // Sigmoid is ok this.getNumberAutoencoders(), // number of autoencoders = (max hidden layers + 1) / // 2 this.getCompression(), // compression from k-th layer to (k+1)-th layer data, // training data true, // true = L2 Error, false = CrossEntropy autoencoderStatics.HintonsMiraculousStrategy(), true, autoencoderStatics.NoObservers()); // test each autoencoder, select the best classifier double bestAccuracy = Double.NEGATIVE_INFINITY; int iteratorcount = 0; topiter = 0;//from ww w .ja v a 2 s.co m for (Autoencoder a : autoencoders) { iteratorcount++; Maniac candidate = new Maniac(); candidate.setOptimizeAE(false); candidate.setNumberAutoencoders(this.getNumberAutoencoders()); candidate.setCompression(this.getCompression()); candidate.setClassifier(this.getClassifier()); candidate.setAE(a); Result res = Evaluation.evaluateModel(candidate, train, test); double curac = (Double) res.getValue("Accuracy"); if (bestAccuracy < curac) { bestAccuracy = curac; topiter = iteratorcount; } } } Instances features = this.extractPart(D, false); Instances labels = this.extractPart(D, true); // first convert the arff into non sparse form SparseToNonSparse spfilter = new SparseToNonSparse(); spfilter.setInputFormat(labels); Instances aeData = Filter.useFilter(labels, spfilter); // now convert it into a format suitable for the autoencoder Mat data = wekaStatics.instancesToMat(aeData); if (this.getAE() == null) { Iterable<Autoencoder> autoencoders = autoencoderStatics.deepAutoencoderStream_java( autoencoderStatics.Sigmoid(), // type of neurons. // Sigmoid is ok this.getNumberAutoencoders(), // number of autoencoders = (max hidden layers + 1) / // 2 this.getCompression(), // compression from k-th layer to (k+1)-th layer data, // training data true, // true = L2 Error, false = CrossEntropy autoencoderStatics.HintonsMiraculousStrategy(), true, autoencoderStatics.NoObservers()); int itercount = 0; for (Autoencoder a : autoencoders) { itercount++; if (topiter > 0 && itercount == topiter || itercount == this.getNumberAutoencoders()) { this.setAE(a); break; } } } Mat compressed = this.getAE().compress(data); Instances compressedLabels = wekaStatics.matToInstances(compressed); // remember the labels to use for the prediction step, this.compressedTemplateInst = new Instances(compressedLabels); Instances result = Instances.mergeInstances(compressedLabels, features); result.setClassIndex(compressedLabels.numAttributes()); return result; }
From source file:meka.experiment.evaluators.CrossValidation.java
License:Open Source License
/** * Returns the evaluation statistics generated for the dataset (sequential execution). * * @param classifier the classifier to evaluate * @param dataset the dataset to evaluate on * @return the statistics/* www. j a v a 2s.c o m*/ */ protected List<EvaluationStatistics> evaluateSequential(MultiLabelClassifier classifier, Instances dataset) { List<EvaluationStatistics> result; EvaluationStatistics stats; Instances train; Instances test; Result res; int i; Random rand; MultiLabelClassifier current; result = new ArrayList<>(); rand = new Random(m_Seed); for (i = 1; i <= m_NumFolds; i++) { log("Fold: " + i); if (m_PreserveOrder) train = dataset.trainCV(m_NumFolds, i - 1); else train = dataset.trainCV(m_NumFolds, i - 1, rand); test = dataset.testCV(m_NumFolds, i - 1); try { current = (MultiLabelClassifier) OptionUtils.shallowCopy(classifier); res = Evaluation.evaluateModel(current, train, test, m_Threshold, m_Verbosity); stats = new EvaluationStatistics(classifier, dataset, res); stats.put(KEY_FOLD, i); result.add(stats); } catch (Exception e) { handleException("Failed to evaluate dataset '" + dataset.relationName() + "' with classifier: " + Utils.toCommandLine(classifier), e); break; } if (m_Stopped) break; } if (m_Stopped) result.clear(); return result; }
From source file:meka.experiment.evaluators.CrossValidation.java
License:Open Source License
/** * Returns the evaluation statistics generated for the dataset (parallel execution). * * @param classifier the classifier to evaluate * @param dataset the dataset to evaluate on * @return the statistics/*from w ww .ja v a2 s . c o m*/ */ protected List<EvaluationStatistics> evaluateParallel(final MultiLabelClassifier classifier, final Instances dataset) { List<EvaluationStatistics> result; ArrayList<EvaluatorJob> jobs; EvaluatorJob job; int i; Random rand; result = new ArrayList<>(); debug("pre: create jobs"); jobs = new ArrayList<>(); rand = new Random(m_Seed); for (i = 1; i <= m_NumFolds; i++) { final int index = i; final Instances train; final Instances test; final MultiLabelClassifier current; if (m_PreserveOrder) train = dataset.trainCV(m_NumFolds, index - 1); else train = dataset.trainCV(m_NumFolds, index - 1, rand); test = dataset.testCV(m_NumFolds, index - 1); current = (MultiLabelClassifier) OptionUtils.shallowCopy(classifier); job = new EvaluatorJob() { protected List<EvaluationStatistics> doCall() throws Exception { List<EvaluationStatistics> result = new ArrayList<>(); log("Executing fold #" + index + "..."); try { Result res = Evaluation.evaluateModel(current, train, test, m_Threshold, m_Verbosity); EvaluationStatistics stats = new EvaluationStatistics(classifier, dataset, res); stats.put(KEY_FOLD, index); result.add(stats); } catch (Exception e) { handleException("Failed to evaluate dataset '" + dataset.relationName() + "' with classifier: " + Utils.toCommandLine(classifier), e); } log("...finished fold #" + index); return result; } }; jobs.add(job); } debug("post: create jobs"); // execute jobs m_Executor = Executors.newFixedThreadPool(m_ActualNumThreads); debug("pre: submit"); try { for (i = 0; i < jobs.size(); i++) m_Executor.submit(jobs.get(i)); } catch (RejectedExecutionException e) { // ignored } catch (Exception e) { handleException("Failed to start up jobs", e); } debug("post: submit"); debug("pre: shutdown"); m_Executor.shutdown(); debug("post: shutdown"); // wait for threads to finish debug("pre: wait"); while (!m_Executor.isTerminated()) { try { m_Executor.awaitTermination(100, TimeUnit.MILLISECONDS); } catch (InterruptedException e) { // ignored } catch (Exception e) { handleException("Failed to await termination", e); } } debug("post: wait"); // collect results debug("pre: collect"); for (i = 0; i < jobs.size(); i++) result.addAll(jobs.get(i).getResult()); debug("post: collect"); return result; }
From source file:mlpoc.MLPOC.java
public static Evaluation crossValidate(String filename) { Evaluation eval = null;/* ww w. ja v a 2 s . c o m*/ try { BufferedReader br = new BufferedReader(new FileReader(filename)); // loads data and set class index Instances data = new Instances(br); br.close(); /*File csv=new File(filename); CSVLoader loader = new CSVLoader(); loader.setSource(csv); Instances data = loader.getDataSet();*/ data.setClassIndex(data.numAttributes() - 1); // classifier String[] tmpOptions; String classname = "weka.classifiers.trees.J48 -C 0.25"; tmpOptions = classname.split(" "); classname = "weka.classifiers.trees.J48"; tmpOptions[0] = ""; Classifier cls = (Classifier) Utils.forName(Classifier.class, classname, tmpOptions); // other options int seed = 2; int folds = 10; // randomize data Random rand = new Random(seed); Instances randData = new Instances(data); randData.randomize(rand); if (randData.classAttribute().isNominal()) randData.stratify(folds); // perform cross-validation eval = new Evaluation(randData); for (int n = 0; n < folds; n++) { Instances train = randData.trainCV(folds, n); Instances test = randData.testCV(folds, n); // the above code is used by the StratifiedRemoveFolds filter, the // code below by the Explorer/Experimenter: // Instances train = randData.trainCV(folds, n, rand); // build and evaluate classifier Classifier clsCopy = Classifier.makeCopy(cls); clsCopy.buildClassifier(train); eval.evaluateModel(clsCopy, test); } // output evaluation System.out.println(); System.out.println("=== Setup ==="); System.out .println("Classifier: " + cls.getClass().getName() + " " + Utils.joinOptions(cls.getOptions())); System.out.println("Dataset: " + data.relationName()); System.out.println("Folds: " + folds); System.out.println("Seed: " + seed); System.out.println(); System.out.println(eval.toSummaryString("Summary for testing", true)); System.out.println("Correctly Classified Instances: " + eval.correct()); System.out.println("Percentage of Correctly Classified Instances: " + eval.pctCorrect()); System.out.println("InCorrectly Classified Instances: " + eval.incorrect()); System.out.println("Percentage of InCorrectly Classified Instances: " + eval.pctIncorrect()); } catch (Exception ex) { System.err.println(ex.getMessage()); } return eval; }
From source file:moa.classifiers.AccuracyWeightedEnsemble.java
License:Open Source License
/** * Computes the weight of a candidate classifier. * @param candidate Candidate classifier. * @param chunk Data chunk of examples.// ww w .j ava2 s . com * @param numFolds Number of folds in candidate classifier cross-validation. * @param useMseR Determines whether to use the MSEr threshold. * @return Candidate classifier weight. */ protected double computeCandidateWeight(Classifier candidate, Instances chunk, int numFolds) { double candidateWeight = 0.0; Random random = new Random(1); Instances randData = new Instances(chunk); randData.randomize(random); if (randData.classAttribute().isNominal()) { randData.stratify(numFolds); } for (int n = 0; n < numFolds; n++) { Instances train = randData.trainCV(numFolds, n, random); Instances test = randData.testCV(numFolds, n); Classifier learner = candidate.copy(); for (int num = 0; num < train.numInstances(); num++) { learner.trainOnInstance(train.instance(num)); } candidateWeight += computeWeight(learner, test); } double resultWeight = candidateWeight / numFolds; if (Double.isInfinite(resultWeight)) { return Double.MAX_VALUE; } else { return resultWeight; } }
From source file:mulan.data.LabelPowersetStratification.java
License:Open Source License
public MultiLabelInstances[] stratify(MultiLabelInstances data, int folds) { try {//from w w w . ja va2s. c o m MultiLabelInstances[] segments = new MultiLabelInstances[folds]; LabelPowersetTransformation transformation = new LabelPowersetTransformation(); Instances transformed; // transform to single-label transformed = transformation.transformInstances(data); // add id Add add = new Add(); add.setAttributeIndex("first"); add.setAttributeName("instanceID"); add.setInputFormat(transformed); transformed = Filter.useFilter(transformed, add); for (int i = 0; i < transformed.numInstances(); i++) { transformed.instance(i).setValue(0, i); } transformed.setClassIndex(transformed.numAttributes() - 1); // stratify transformed.randomize(new Random(seed)); transformed.stratify(folds); for (int i = 0; i < folds; i++) { //System.out.println("Fold " + (i + 1) + "/" + folds); Instances temp = transformed.testCV(folds, i); Instances test = new Instances(data.getDataSet(), 0); for (int j = 0; j < temp.numInstances(); j++) { test.add(data.getDataSet().instance((int) temp.instance(j).value(0))); } segments[i] = new MultiLabelInstances(test, data.getLabelsMetaData()); } return segments; } catch (Exception ex) { Logger.getLogger(LabelPowersetStratification.class.getName()).log(Level.SEVERE, null, ex); return null; } }