List of usage examples for weka.core Instances randomize
public void randomize(Random random)
From source file:knnclassifier.Main.java
public static void main(String[] args) throws Exception { DataSource source = new DataSource(file); Instances dataSet = source.getDataSet(); //Set up data dataSet.setClassIndex(dataSet.numAttributes() - 1); dataSet.randomize(new Random()); int trainingSize = (int) Math.round(dataSet.numInstances() * .7); int testSize = dataSet.numInstances() - trainingSize; Instances training = new Instances(dataSet, 0, trainingSize); Instances test = new Instances(dataSet, trainingSize, testSize); Standardize standardizedData = new Standardize(); standardizedData.setInputFormat(training); Instances newTest = Filter.useFilter(test, standardizedData); Instances newTraining = Filter.useFilter(training, standardizedData); KNNClassifier knn = new KNNClassifier(); knn.buildClassifier(newTraining);/*from ww w .ja v a 2 s. c o m*/ Evaluation eval = new Evaluation(newTraining); eval.evaluateModel(knn, newTest); System.out.println(eval.toSummaryString("\nResults\n======\n", false)); }
From source file:liac.igmn.evaluation.Evaluator.java
License:Open Source License
public void crossValidation(IGMN model, Dataset dataset, int numFolds, int runs, boolean randomize) { confusionMatrix = new ConfusionMatrix(dataset.getClassesNames()); Instances instances = dataset.getWekaDataset(); int seed = 1; for (int run = 0; run < runs; run++) { if (randomize) { instances.randomize(new Random(seed)); seed += 1;/* ww w .ja va2 s .c o m*/ } if (verbose) System.out.println("RUN: " + (run + 1)); for (int n = 0; n < numFolds; n++) { Instances train = instances.trainCV(numFolds, n); Instances test = instances.testCV(numFolds, n); SimpleMatrix trainData = MatrixUtil.instancesToMatrix(train); SimpleMatrix testData = MatrixUtil.instancesToMatrix(test); model.reset(); if (verbose) System.out.println("TRAINING FOLD: " + (n + 1)); model.train(trainData); if (verbose) System.out.println("TESTING..."); SimpleMatrix testInputs = testData.extractMatrix(0, dataset.getInputSize(), 0, SimpleMatrix.END); SimpleMatrix testTargets = testData.extractMatrix(dataset.getInputSize(), dataset.getNumAttributes(), 0, SimpleMatrix.END); for (int i = 0; i < testInputs.numCols(); i++) { SimpleMatrix y = model.classify(testInputs.extractVector(false, i)); SimpleMatrix target = testTargets.extractVector(false, i); int tInd = MatrixUtil.maxElementIndex(target); int yInd = MatrixUtil.maxElementIndex(y); confusionMatrix.addPrediction(tInd, yInd); } } } confusionMatrix.set(confusionMatrix.divide(runs)); }
From source file:machinelearningcw.EnhancedLinearPerceptron.java
public boolean crossValidation(Instances ins) throws Exception { //get the data Instances data = new Instances(ins); Instances train;// the new training data Instances test; // the new testing data int seed = 0; Random rand = new Random(seed); //randomize the data data.randomize(rand); //number of folds int folds = 10; int offlineErrors = 0; int onlineErrors = 0; for (int i = 0; i < folds; i++) { train = data.trainCV(folds, i);//w w w .ja v a2s . c o m test = data.testCV(folds, i); //add the the total errors for each //offlineErrors += offlinePerceptron(train); for (Instance inst : test) { if (classifyInstance(inst) != inst.classValue()) { offlineErrors += 1; } } //reset w Arrays.fill(w, 1); perceptron(train); for (Instance inst : test) { if (classifyInstance(inst) != inst.classValue()) { onlineErrors += 1; } } } // System.out.println(" off: " + offlineErrors); // System.out.println(" on: " + onlineErrors); //calculate the mean of the total errors offlineErrors = offlineErrors / folds; onlineErrors = onlineErrors / folds; // System.out.println(flag); return offlineErrors > onlineErrors; }
From source file:mao.datamining.ModelProcess.java
private void testCV(Classifier classifier, Instances finalTrainDataSet, FileOutputStream testCaseSummaryOut, TestResult result) {/*from w w w . j a v a 2 s .c o m*/ long start, end, trainTime = 0, testTime = 0; Evaluation evalAll = null; double confusionMatrix[][] = null; // randomize data, and then stratify it into 10 groups Random rand = new Random(1); Instances randData = new Instances(finalTrainDataSet); randData.randomize(rand); if (randData.classAttribute().isNominal()) { //always run with 10 cross validation randData.stratify(folds); } try { evalAll = new Evaluation(randData); for (int i = 0; i < folds; i++) { Evaluation eval = new Evaluation(randData); Instances train = randData.trainCV(folds, i); Instances test = randData.testCV(folds, i); //counting traininig time start = System.currentTimeMillis(); Classifier j48ClassifierCopy = Classifier.makeCopy(classifier); j48ClassifierCopy.buildClassifier(train); end = System.currentTimeMillis(); trainTime += end - start; //counting test time start = System.currentTimeMillis(); eval.evaluateModel(j48ClassifierCopy, test); evalAll.evaluateModel(j48ClassifierCopy, test); end = System.currentTimeMillis(); testTime += end - start; } } catch (Exception e) { ModelProcess.logging(null, e); } //end test by cross validation // output evaluation try { ModelProcess.logging(""); //write into summary file testCaseSummaryOut .write((evalAll.toSummaryString("=== Cross Validation Summary ===", true)).getBytes()); testCaseSummaryOut.write("\n".getBytes()); testCaseSummaryOut.write( (evalAll.toClassDetailsString("=== " + folds + "-fold Cross-validation Class Detail ===\n")) .getBytes()); testCaseSummaryOut.write("\n".getBytes()); testCaseSummaryOut .write((evalAll.toMatrixString("=== Confusion matrix for all folds ===\n")).getBytes()); testCaseSummaryOut.flush(); confusionMatrix = evalAll.confusionMatrix(); result.setConfusionMatrix10Folds(confusionMatrix); } catch (Exception e) { ModelProcess.logging(null, e); } }
From source file:meka.classifiers.multilabel.BRq.java
License:Open Source License
@Override public void buildClassifier(Instances data) throws Exception { testCapabilities(data);//from ww w . j a v a2s . com int c = data.classIndex(); if (getDebug()) System.out.print("-: Creating " + c + " models (" + m_Classifier.getClass().getName() + "): "); m_MultiClassifiers = AbstractClassifier.makeCopies(m_Classifier, c); Instances sub_data = null; for (int i = 0; i < c; i++) { int indices[][] = new int[c][c - 1]; for (int j = 0, k = 0; j < c; j++) { if (j != i) { indices[i][k++] = j; } } //Select only class attribute 'i' Remove FilterRemove = new Remove(); FilterRemove.setAttributeIndicesArray(indices[i]); FilterRemove.setInputFormat(data); FilterRemove.setInvertSelection(true); sub_data = Filter.useFilter(data, FilterRemove); sub_data.setClassIndex(0); /* BEGIN downsample for this link */ sub_data.randomize(m_Random); int numToRemove = sub_data.numInstances() - (int) Math.round(sub_data.numInstances() * m_DownSampleRatio); for (int m = 0, removed = 0; m < sub_data.numInstances(); m++) { if (sub_data.instance(m).classValue() <= 0.0) { sub_data.instance(m).setClassMissing(); if (++removed >= numToRemove) break; } } sub_data.deleteWithMissingClass(); /* END downsample for this link */ //Build the classifier for that class m_MultiClassifiers[i].buildClassifier(sub_data); if (getDebug()) System.out.print(" " + (i + 1)); } if (getDebug()) System.out.println(" :-"); m_InstancesTemplate = new Instances(sub_data, 0); }
From source file:meka.classifiers.multilabel.Evaluation.java
License:Open Source License
/** * RunExperiment - Build and evaluate a model with command-line options. * @param h multi-label classifier * @param options command line options *///from w w w. j a v a 2 s. c o m public static void runExperiment(MultiLabelClassifier h, String options[]) throws Exception { // Help if (Utils.getOptionPos('h', options) >= 0) { System.out.println("\nHelp requested"); Evaluation.printOptions(h.listOptions()); return; } h.setOptions(options); if (h.getDebug()) System.out.println("Loading and preparing dataset ..."); // Load Instances from a file Instances D_train = loadDataset(options); Instances D_full = D_train; // Try extract and set a class index from the @relation name MLUtils.prepareData(D_train); // Override the number of classes with command-line option (optional) if (Utils.getOptionPos('C', options) >= 0) { int L = Integer.parseInt(Utils.getOption('C', options)); D_train.setClassIndex(L); } // We we still haven't found -C option, we can't continue (don't know how many labels) int L = D_train.classIndex(); if (L <= 0) { throw new Exception( "[Error] Number of labels not specified.\n\tYou must set the number of labels with the -C option, either inside the @relation tag of the Instances file, or on the command line."); // apparently the dataset didn't contain the '-C' flag, check in the command line options ... } // Randomize (Instances) int seed = (Utils.getOptionPos('s', options) >= 0) ? Integer.parseInt(Utils.getOption('s', options)) : 0; if (Utils.getFlag('R', options)) { D_train.randomize(new Random(seed)); } boolean Threaded = false; if (Utils.getOptionPos("Thr", options) >= 0) { Threaded = Utils.getFlag("Thr", options); } // Verbosity Option String voption = "1"; if (Utils.getOptionPos("verbosity", options) >= 0) { voption = Utils.getOption("verbosity", options); } // Save for later? //String fname = null; //if (Utils.getOptionPos('f',options) >= 0) { // fname = Utils.getOption('f',options); //} // Dump for later? String dname = null; if (Utils.getOptionPos('d', options) >= 0) { dname = Utils.getOption('d', options); } // Load from file? String lname = null; Instances dataHeader = null; if (Utils.getOptionPos('l', options) >= 0) { lname = Utils.getOption('l', options); Object[] data = SerializationHelper.readAll(lname); h = (MultiLabelClassifier) data[0]; if (data.length > 1) dataHeader = (Instances) data[1]; //Object o[] = SerializationHelper.readAll(lname); //h = (MultilabelClassifier)o[0]; } try { Result r = null; // Threshold OPtion String top = "PCut1"; // default if (Utils.getOptionPos("threshold", options) >= 0) top = Utils.getOption("threshold", options); if (Utils.getOptionPos('x', options) >= 0) { // CROSS-FOLD-VALIDATION int numFolds = MLUtils.getIntegerOption(Utils.getOption('x', options), 10); // default 10 // Check for remaining options Utils.checkForRemainingOptions(options); r = Evaluation.cvModel(h, D_train, numFolds, top, voption); System.out.println(r.toString()); } else { // TRAIN-TEST SPLIT Instances D_test = null; if (Utils.getOptionPos('T', options) >= 0) { // load separate test set try { D_test = loadDataset(options, 'T'); MLUtils.prepareData(D_test); } catch (Exception e) { throw new Exception("[Error] Failed to Load Test Instances from file.", e); } } else { // split training set into train and test sets // default split int N_T = (int) (D_train.numInstances() * 0.60); if (Utils.getOptionPos("split-percentage", options) >= 0) { // split by percentage double percentTrain = Double.parseDouble(Utils.getOption("split-percentage", options)); N_T = (int) Math.round((D_train.numInstances() * (percentTrain / 100.0))); } else if (Utils.getOptionPos("split-number", options) >= 0) { // split by number N_T = Integer.parseInt(Utils.getOption("split-number", options)); } int N_t = D_train.numInstances() - N_T; D_test = new Instances(D_train, N_T, N_t); D_train = new Instances(D_train, 0, N_T); } // Invert the split? if (Utils.getFlag('i', options)) { //boolean INVERT = Utils.getFlag('i',options); Instances temp = D_test; D_test = D_train; D_train = temp; } // Check for remaining options Utils.checkForRemainingOptions(options); if (h.getDebug()) System.out.println(":- Dataset -: " + MLUtils.getDatasetName(D_train) + "\tL=" + L + "\tD(t:T)=(" + D_train.numInstances() + ":" + D_test.numInstances() + ")\tLC(t:T)=" + Utils.roundDouble(MLUtils.labelCardinality(D_train, L), 2) + ":" + Utils.roundDouble(MLUtils.labelCardinality(D_test, L), 2) + ")"); if (lname != null) { // h is already built, and loaded from a file, test it! r = testClassifier(h, D_test); String t = top; if (top.startsWith("PCut")) { // if PCut is specified we need the training data, // so that we can calibrate the threshold! t = MLEvalUtils.getThreshold(r.predictions, D_train, top); } r = evaluateModel(h, D_test, t, voption); } else { //check if train and test set size are > 0 if (D_train.numInstances() > 0 && D_test.numInstances() > 0) { if (Threaded) { r = evaluateModelM(h, D_train, D_test, top, voption); } else { r = evaluateModel(h, D_train, D_test, top, voption); } } else { // otherwise just train on full set. Maybe better throw an exception. h.buildClassifier(D_full); } } // @todo, if D_train==null, assume h is already trained if (D_train.numInstances() > 0 && D_test.numInstances() > 0) { System.out.println(r.toString()); } } // Save model to file? if (dname != null) { dataHeader = new Instances(D_train, 0); SerializationHelper.writeAll(dname, new Object[] { h, dataHeader }); } } catch (Exception e) { e.printStackTrace(); Evaluation.printOptions(h.listOptions()); System.exit(1); } System.exit(0); }
From source file:meka.classifiers.multilabel.meta.EnsembleML.java
License:Open Source License
@Override public void buildClassifier(Instances train) throws Exception { testCapabilities(train);//from ww w . j a va2s.com if (getDebug()) System.out.print("-: Models: "); train = new Instances(train); m_Classifiers = ProblemTransformationMethod.makeCopies((ProblemTransformationMethod) m_Classifier, m_NumIterations); int sub_size = (train.numInstances() * m_BagSizePercent / 100); for (int i = 0; i < m_NumIterations; i++) { if (getDebug()) System.out.print("" + i + " "); if (m_Classifiers[i] instanceof Randomizable) ((Randomizable) m_Classifiers[i]).setSeed(i); train.randomize(new Random(m_Seed + i)); Instances sub_train = new Instances(train, 0, sub_size); m_Classifiers[i].buildClassifier(sub_train); } if (getDebug()) System.out.println(":-"); }
From source file:meka.classifiers.multilabel.meta.RandomSubspaceML.java
License:Open Source License
@Override public void buildClassifier(Instances D) throws Exception { testCapabilities(D);//from w w w.j ava 2 s. co m m_InstancesTemplates = new Instances[m_NumIterations]; m_InstanceTemplates = new Instance[m_NumIterations]; if (getDebug()) System.out.println("-: Models: "); m_Classifiers = ProblemTransformationMethod.makeCopies((ProblemTransformationMethod) m_Classifier, m_NumIterations); Random r = new Random(m_Seed); int N_sub = (D.numInstances() * m_BagSizePercent / 100); int L = D.classIndex(); int d = D.numAttributes() - L; int d_new = d * m_AttSizePercent / 100; m_IndicesCut = new int[m_NumIterations][]; for (int i = 0; i < m_NumIterations; i++) { // Downsize the instance space (exactly like in EnsembleML.java) if (getDebug()) System.out.print("\t" + (i + 1) + ": "); D.randomize(r); Instances D_cut = new Instances(D, 0, N_sub); if (getDebug()) System.out.print("N=" + D.numInstances() + " -> N'=" + D_cut.numInstances() + ", "); // Downsize attribute space D_cut.setClassIndex(-1); int indices_a[] = A.make_sequence(L, d + L); A.shuffle(indices_a, r); indices_a = Arrays.copyOfRange(indices_a, 0, d - d_new); Arrays.sort(indices_a); m_IndicesCut[i] = A.invert(indices_a, D.numAttributes()); D_cut = F.remove(D_cut, indices_a, false); D_cut.setClassIndex(L); if (getDebug()) System.out.print(" A:=" + (D.numAttributes() - L) + " -> A'=" + (D_cut.numAttributes() - L) + " (" + m_IndicesCut[i][L] + ",...," + m_IndicesCut[i][m_IndicesCut[i].length - 1] + ")"); // Train multi-label classifier if (m_Classifiers[i] instanceof Randomizable) ((Randomizable) m_Classifiers[i]).setSeed(m_Seed + i); if (getDebug()) System.out.println("."); m_Classifiers[i].buildClassifier(D_cut); m_InstanceTemplates[i] = D_cut.instance(1); m_InstancesTemplates[i] = new Instances(D_cut, 0); } if (getDebug()) System.out.println(":-"); }
From source file:meka.classifiers.multitarget.SCC.java
License:Open Source License
@Override public void buildClassifier(Instances D) throws Exception { int N = D.numInstances(); // only for printouts int U = MLUtils.numberOfUniqueCombinations(D); // only for printouts int L = D.classIndex(); rand = new Random(m_S); if (!(m_Classifier instanceof MultiTargetClassifier)) { throw new Exception( "[Error] The base classifier must be multi-target capable, i.e., from meka.classifiers.multitarget."); }/* www . jav a 2 s .c om*/ // 0. SPLIT INTO TRAIN AND VALIDATION SET/S Instances D_r = new Instances(D); D_r.randomize(rand); Instances D_train = new Instances(D_r, 0, D_r.numInstances() * i_SPLIT / 100); Instances D_test = new Instances(D_r, D_train.numInstances(), D_r.numInstances() - D_train.numInstances()); // 1. BUILD BR or EBR if (getDebug()) System.out.print("1. BUILD & Evaluate BR: "); CR cr = new CR(); cr.setClassifier(((ProblemTransformationMethod) m_Classifier).getClassifier()); // assume PT Result result_1 = Evaluation.evaluateModel((ProblemTransformationMethod) cr, D_train, D_test, "PCut1", "5"); double acc1 = (Double) result_1.getMeasurement(i_ErrFn); if (getDebug()) System.out.println(" " + acc1); int partition[][] = SuperLabelUtils.generatePartition(A.make_sequence(L), rand); // 2. SELECT / MODIFY INDICES (using LEAD technique) if (getDebug()) System.out.println("2. GET ERR-CHI-SQUARED MATRIX: "); double MER[][] = StatUtils.condDepMatrix(D_test, result_1); if (getDebug()) System.out.println(MatrixUtils.toString(MER)); /* * 3. SIMULATED ANNEALING * Always accept if best, progressively less likely accept otherwise. */ if (getDebug()) System.out.println("3. COMBINE NODES TO FIND THE BEST COMBINATION ACCORDING TO CHI"); double w = rating(partition, MER); if (getDebug()) System.out.println("@0 : " + SuperLabelUtils.toString(partition) + "\t(" + w + ")"); for (int i = 0; i < m_I; i++) { int partition_[][] = mutateCombinations(MatrixUtils.deep_copy(partition), rand); double w_ = rating(partition_, MER); // this is really p_MER(partition_) if (w_ > w) { // ACCEPT partition = partition_; w = w_; if (getDebug()) System.out.println("@" + i + " : " + SuperLabelUtils.toString(partition) + "\t(" + w + ")"); } else { // MAYBE ACCEPT double diff = Math.abs(w_ - w); double p = (2. * (1. - sigma(diff * i / 1000.))); if (p > rand.nextDouble()) { // OK, ACCEPT NOW if (getDebug()) System.out.println( "@" + i + " : " + SuperLabelUtils.toString(partition_) + "\t(" + w_ + ")*"); partition = partition_; w = w_; } } } /* * METHOD 2 * refine the set we started with above, with a few iterations. * we mutate a set, and accept whenever the classification performance is GREATER */ if (m_Iv > 0) { if (getDebug()) System.out.println("4. REFINING THE INITIAL SET WITH SOME OLD-FASHIONED INTERNAL EVAL"); // Build & evaluate the classifier with the latest partition result_1 = testClassifier((ProblemTransformationMethod) m_Classifier, D_train, D_test, partition); w = (Double) result_1.getMeasurement(i_ErrFn); if (getDebug()) System.out.println("@0 : " + SuperLabelUtils.toString(partition) + "\t(" + w + ")"); for (int i = 0; i < m_Iv; i++) { int partition_[][] = mutateCombinations(MatrixUtils.deep_copy(partition), rand); // Build the classifier with the new combination trainClassifier(m_Classifier, D_train, partition); // Evaluate on D_test Result result_2 = testClassifier((ProblemTransformationMethod) m_Classifier, D_train, D_test, partition_); double w_ = (Double) result_2.getMeasurement(i_ErrFn); if (w_ > w) { w = w_; partition = partition_; if (getDebug()) System.out.println( "@" + (i + 1) + "' : " + SuperLabelUtils.toString(partition) + "\t(" + w + ")"); } } } // 4. DECIDE HOW GOOD THEY ARE, COMPARE EACH LABEL TO BR-result? if (getDebug()) System.out.println("4. TRAIN " + SuperLabelUtils.toString(partition)); trainClassifier(m_Classifier, D, partition); if (getDebug()) { //System.out.println("E_acc P "+m_P+" "+(mt.m_InstancesTemplate.numInstances()/(double)N) +" "+(MLUtils.numberOfUniqueCombinations(mt.m_InstancesTemplate)/(double)U)); } // 5. MOVE ON ... }
From source file:meka.core.StatUtils.java
License:Open Source License
/** * LEAD - Performs LEAD on dataset 'D', using BR with base classifier 'h', under random seed 'r'. * <br>// w w w . ja va 2 s . co m * WARNING: changing this method will affect the perfomance of e.g., BCC -- on the other hand the original BCC paper did not use LEAD, so don't worry. */ public static double[][] LEAD(Instances D, Classifier h, Random r) throws Exception { Instances D_r = new Instances(D); D_r.randomize(r); Instances D_train = new Instances(D_r, 0, D_r.numInstances() * 60 / 100); Instances D_test = new Instances(D_r, D_train.numInstances(), D_r.numInstances() - D_train.numInstances()); BR br = new BR(); br.setClassifier(h); Result result = Evaluation.evaluateModel((MultiLabelClassifier) br, D_train, D_test, "PCut1", "1"); return LEAD2(D_test, result); }