List of usage examples for weka.core Instances randomize
public void randomize(Random random)
From source file:br.unicamp.ic.recod.gpsi.gp.gpsiJGAPRoiFitnessFunction.java
@Override protected double evaluate(IGPProgram igpp) { double mean_accuracy = 0.0; Object[] noargs = new Object[0]; gpsiRoiBandCombiner roiBandCombinator = new gpsiRoiBandCombiner(new gpsiJGAPVoxelCombiner(super.b, igpp)); // TODO: The ROI descriptors must combine the images first //roiBandCombinator.combineEntity(this.dataset.getTrainingEntities()); gpsiMLDataset mlDataset = new gpsiMLDataset(this.descriptor); try {//from w w w . j a va 2s .com mlDataset.loadWholeDataset(this.dataset, true); } catch (Exception ex) { Logger.getLogger(gpsiJGAPRoiFitnessFunction.class.getName()).log(Level.SEVERE, null, ex); } int dimensionality = mlDataset.getDimensionality(); int n_classes = mlDataset.getTrainingEntities().keySet().size(); int n_entities = mlDataset.getNumberOfTrainingEntities(); ArrayList<Byte> listOfClasses = new ArrayList<>(mlDataset.getTrainingEntities().keySet()); Attribute[] attributes = new Attribute[dimensionality]; FastVector fvClassVal = new FastVector(n_classes); int i, j; for (i = 0; i < dimensionality; i++) attributes[i] = new Attribute("f" + Integer.toString(i)); for (i = 0; i < n_classes; i++) fvClassVal.addElement(Integer.toString(listOfClasses.get(i))); Attribute classes = new Attribute("class", fvClassVal); FastVector fvWekaAttributes = new FastVector(dimensionality + 1); for (i = 0; i < dimensionality; i++) fvWekaAttributes.addElement(attributes[i]); fvWekaAttributes.addElement(classes); Instances instances = new Instances("Rel", fvWekaAttributes, n_entities); instances.setClassIndex(dimensionality); Instance iExample; for (byte label : mlDataset.getTrainingEntities().keySet()) { for (double[] featureVector : mlDataset.getTrainingEntities().get(label)) { iExample = new Instance(dimensionality + 1); for (j = 0; j < dimensionality; j++) iExample.setValue(i, featureVector[i]); iExample.setValue(dimensionality, label); instances.add(iExample); } } int folds = 5; Random rand = new Random(); Instances randData = new Instances(instances); randData.randomize(rand); Instances trainingSet, testingSet; Classifier cModel; Evaluation eTest; try { for (i = 0; i < folds; i++) { cModel = (Classifier) new SimpleLogistic(); trainingSet = randData.trainCV(folds, i); testingSet = randData.testCV(folds, i); cModel.buildClassifier(trainingSet); eTest = new Evaluation(trainingSet); eTest.evaluateModel(cModel, testingSet); mean_accuracy += eTest.pctCorrect(); } } catch (Exception ex) { Logger.getLogger(gpsiJGAPRoiFitnessFunction.class.getName()).log(Level.SEVERE, null, ex); } mean_accuracy /= (folds * 100); return mean_accuracy; }
From source file:c4.pkg5crossv.Classifier.java
public static void trainAndTest() throws FileNotFoundException, IOException, Exception { Instances data = DataLoad.loadData("./src/data/irysy.arff"); data.setClassIndex(data.numAttributes() - 1); //Losowy podzial tablicy data.randomize(new Random()); double percent = 60.0; int trainSize = (int) Math.round(data.numInstances() * percent / 100); int testSize = data.numInstances() - trainSize; Instances trainData = new Instances(data, 0, trainSize); Instances testData = new Instances(data, trainSize, testSize); String[] options = Utils.splitOptions("-U -M 10"); J48 tree = new J48(); tree.setOptions(options);// w w w . j a va 2 s . c om tree.buildClassifier(trainData); Evaluation eval2 = new Evaluation(trainData); eval2.crossValidateModel(tree, testData, 10, new Random(1)); // 5 - fold System.out.println(eval2.toSummaryString("Wyniki:", false)); //Wypisanie testovania cross validation }
From source file:cezeri.evaluater.FactoryEvaluation.java
public static Evaluation performCrossValidate(Classifier model, Instances datax, int folds, boolean show_text, boolean show_plot, TFigureAttribute attr) { Random rand = new Random(1); Instances randData = new Instances(datax); randData.randomize(rand); if (randData.classAttribute().isNominal()) { randData.stratify(folds);// www . jav a 2s . c o m } Evaluation eval = null; try { // perform cross-validation eval = new Evaluation(randData); // double[] simulated = new double[0]; // double[] observed = new double[0]; // double[] sim = new double[0]; // double[] obs = new double[0]; for (int n = 0; n < folds; n++) { Instances train = randData.trainCV(folds, n, rand); Instances validation = randData.testCV(folds, n); // build and evaluate classifier Classifier clsCopy = Classifier.makeCopy(model); clsCopy.buildClassifier(train); // sim = eval.evaluateModel(clsCopy, validation); // obs = validation.attributeToDoubleArray(validation.classIndex()); // if (show_plot) { // double[][] d = new double[2][sim.length]; // d[0] = obs; // d[1] = sim; // CMatrix f1 = CMatrix.getInstance(d); // f1.transpose().plot(attr); // } // if (show_text) { // // output evaluation // System.out.println(); // System.out.println("=== Setup for each Cross Validation fold==="); // System.out.println("Classifier: " + model.getClass().getName() + " " + Utils.joinOptions(model.getOptions())); // System.out.println("Dataset: " + randData.relationName()); // System.out.println("Folds: " + folds); // System.out.println("Seed: " + 1); // System.out.println(); // System.out.println(eval.toSummaryString("=== " + folds + "-fold Cross-validation ===", false)); // } simulated = FactoryUtils.concatenate(simulated, eval.evaluateModel(clsCopy, validation)); observed = FactoryUtils.concatenate(observed, validation.attributeToDoubleArray(validation.classIndex())); // simulated = FactoryUtils.mean(simulated,eval.evaluateModel(clsCopy, validation)); // observed = FactoryUtils.mean(observed,validation.attributeToDoubleArray(validation.classIndex())); } if (show_plot) { double[][] d = new double[2][simulated.length]; d[0] = observed; d[1] = simulated; CMatrix f1 = CMatrix.getInstance(d); attr.figureCaption = "overall performance"; f1.transpose().plot(attr); } if (show_text) { // output evaluation System.out.println(); System.out.println("=== Setup for Overall Cross Validation==="); System.out.println( "Classifier: " + model.getClass().getName() + " " + Utils.joinOptions(model.getOptions())); System.out.println("Dataset: " + randData.relationName()); System.out.println("Folds: " + folds); System.out.println("Seed: " + 1); System.out.println(); System.out.println(eval.toSummaryString("=== " + folds + "-fold Cross-validation ===", false)); } } catch (Exception ex) { Logger.getLogger(FactoryEvaluation.class.getName()).log(Level.SEVERE, null, ex); } return eval; }
From source file:cezeri.evaluater.FactoryEvaluation.java
public static Evaluation performCrossValidateTestAlso(Classifier model, Instances datax, Instances test, boolean show_text, boolean show_plot) { TFigureAttribute attr = new TFigureAttribute(); Random rand = new Random(1); Instances randData = new Instances(datax); randData.randomize(rand); Evaluation eval = null;/* w w w . ja v a 2 s . c om*/ int folds = randData.numInstances(); try { eval = new Evaluation(randData); for (int n = 0; n < folds; n++) { // randData.randomize(rand); // Instances train = randData; Instances train = randData.trainCV(folds, n); // Instances train = randData.trainCV(folds, n, rand); Classifier clsCopy = Classifier.makeCopy(model); clsCopy.buildClassifier(train); Instances validation = randData.testCV(folds, n); // Instances validation = test.testCV(test.numInstances(), n%test.numInstances()); // CMatrix.fromInstances(train).showDataGrid(); // CMatrix.fromInstances(validation).showDataGrid(); simulated = FactoryUtils.concatenate(simulated, eval.evaluateModel(clsCopy, validation)); observed = FactoryUtils.concatenate(observed, validation.attributeToDoubleArray(validation.classIndex())); } if (show_plot) { double[][] d = new double[2][simulated.length]; d[0] = observed; d[1] = simulated; CMatrix f1 = CMatrix.getInstance(d); attr.figureCaption = "overall performance"; f1.transpose().plot(attr); } if (show_text) { // output evaluation System.out.println(); System.out.println("=== Setup for Overall Cross Validation==="); System.out.println( "Classifier: " + model.getClass().getName() + " " + Utils.joinOptions(model.getOptions())); System.out.println("Dataset: " + randData.relationName()); System.out.println("Folds: " + folds); System.out.println("Seed: " + 1); System.out.println(); System.out.println(eval.toSummaryString("=== " + folds + "-fold Cross-validation ===", false)); } } catch (Exception ex) { Logger.getLogger(FactoryEvaluation.class.getName()).log(Level.SEVERE, null, ex); } return eval; }
From source file:cezeri.evaluater.FactoryEvaluation.java
private static Evaluation doTest(boolean isTrained, Classifier model, Instances train, Instances test, boolean show_text, boolean show_plot, TFigureAttribute attr) { Instances data = new Instances(train); Random rand = new Random(1); data.randomize(rand); Evaluation eval = null;/*from ww w . j a va 2 s . co m*/ try { // double[] simulated = null; eval = new Evaluation(train); if (isTrained) { simulated = eval.evaluateModel(model, test); } else { Classifier clsCopy = Classifier.makeCopy(model); clsCopy.buildClassifier(train); simulated = eval.evaluateModel(clsCopy, test); } if (show_plot) { observed = test.attributeToDoubleArray(test.classIndex()); double[][] d = new double[2][simulated.length]; d[0] = observed; d[1] = simulated; CMatrix f1 = CMatrix.getInstance(d); String[] items = { "Observed", "Simulated" }; attr.items = items; attr.figureCaption = model.getClass().getCanonicalName(); f1.transpose().plot(attr); // if (attr.axis[0].isEmpty() && attr.axis[1].isEmpty()) { // f1.transpose().plot(attr); // } else { // f1.transpose().plot(model.getClass().getCanonicalName(), attr.items, attr.axis); // } } if (show_text) { System.out.println(); System.out.println("=== Setup for Test ==="); System.out.println( "Classifier: " + model.getClass().getName() + " " + Utils.joinOptions(model.getOptions())); System.out.println("Dataset: " + test.relationName()); System.out.println(); System.out.println(eval.toSummaryString("=== Test Results ===", false)); } } catch (Exception ex) { Logger.getLogger(FactoryEvaluation.class.getName()).log(Level.SEVERE, null, ex); } return eval; }
From source file:cezeri.feature.selection.FeatureSelectionInfluence.java
public static Influence[] getMostDiscriminativeFeature(String filePath, Classifier model) { Influence[] ret = null;/*from w ww. jav a 2s. c o m*/ try { Instances data = DataSource.read(filePath); ret = new Influence[data.numAttributes() - 1]; data.setClassIndex(data.numAttributes() - 1); // other options int seed = 1; int folds = 10; // randomize data Instances randData = new Instances(data); Random rand = new Random(seed); randData.randomize(rand); Evaluation evalBase = getEvaluation(randData, model, folds); double accBase = evalBase.correct() / evalBase.numInstances() * 100; double nf = randData.numAttributes(); for (int j = 0; j < nf - 1; j++) { ret[j] = new Influence(); String str = randData.attribute(j).name(); Attribute att = randData.attribute(j); randData.deleteAttributeAt(j); Evaluation evalTemp = getEvaluation(randData, model, folds); double accTemp = evalTemp.correct() / evalTemp.numInstances() * 100; double tempInfluence = accBase - accTemp; ret[j].attributeName = str; ret[j].infVal = tempInfluence; randData.insertAttributeAt(att, j); } sortInfluenceArray(ret); } catch (Exception ex) { Logger.getLogger(FeatureSelectionInfluence.class.getName()).log(Level.SEVERE, null, ex); } return ret; }
From source file:com.mycompany.id3classifier.ID3Shell.java
public static void main(String[] args) throws Exception { ConverterUtils.DataSource source = new ConverterUtils.DataSource("lensesData.csv"); Instances dataSet = source.getDataSet(); Discretize filter = new Discretize(); filter.setInputFormat(dataSet);//from w w w. j a v a 2s .c om dataSet = Filter.useFilter(dataSet, filter); Standardize standardize = new Standardize(); standardize.setInputFormat(dataSet); dataSet = Filter.useFilter(dataSet, standardize); dataSet.setClassIndex(dataSet.numAttributes() - 1); dataSet.randomize(new Random(9001)); //It's over 9000!! int folds = 10; //Perform crossvalidation Evaluation eval = new Evaluation(dataSet); for (int n = 0; n < folds; n++) { int trainingSize = (int) Math.round(dataSet.numInstances() * .7); int testSize = dataSet.numInstances() - trainingSize; Instances trainingData = dataSet.trainCV(folds, n); Instances testData = dataSet.testCV(folds, n); ID3Classifier classifier = new ID3Classifier(); // Id3 classifier = new Id3(); classifier.buildClassifier(trainingData); eval.evaluateModel(classifier, testData); } System.out.println(eval.toSummaryString("\nResults:\n", false)); }
From source file:com.mycompany.knnclassifier.kNNShell.java
public static void main(String[] args) throws Exception { ConverterUtils.DataSource source = new ConverterUtils.DataSource("carData.csv"); Instances dataSet = source.getDataSet(); Standardize standardize = new Standardize(); standardize.setInputFormat(dataSet); dataSet = Filter.useFilter(dataSet, standardize); dataSet.setClassIndex(dataSet.numAttributes() - 1); dataSet.randomize(new Random(9001)); //It's over 9000!! int trainingSize = (int) Math.round(dataSet.numInstances() * .7); int testSize = dataSet.numInstances() - trainingSize; Instances trainingData = new Instances(dataSet, 0, trainingSize); Instances testData = new Instances(dataSet, trainingSize, testSize); kNNClassifier classifier = new kNNClassifier(3); classifier.buildClassifier(trainingData); //Used to compare to Weka's built in KNN algorithm //Classifier classifier = new IBk(1); //classifier.buildClassifier(trainingData); Evaluation eval = new Evaluation(trainingData); eval.evaluateModel(classifier, testData); System.out.println(eval.toSummaryString("\nResults:\n", false)); }
From source file:com.mycompany.neuralnetwork.NeuralNetworkShell.java
public static void main(String[] args) throws Exception { ConverterUtils.DataSource source = new ConverterUtils.DataSource("irisData.csv"); Instances dataSet = source.getDataSet(); Standardize standardize = new Standardize(); standardize.setInputFormat(dataSet); dataSet = Filter.useFilter(dataSet, standardize); dataSet.setClassIndex(dataSet.numAttributes() - 1); dataSet.randomize(new Random(9001)); //It's over 9000!! int trainingSize = (int) Math.round(dataSet.numInstances() * .7); int testSize = dataSet.numInstances() - trainingSize; Instances trainingData = new Instances(dataSet, 0, trainingSize); Instances testData = new Instances(dataSet, trainingSize, testSize); //MultilayerPerceptron classifier = new MultilayerPerceptron(); NeuralNetworkClassifier classifier = new NeuralNetworkClassifier(3, 20000, 0.1); classifier.buildClassifier(trainingData); Evaluation eval = new Evaluation(trainingData); eval.evaluateModel(classifier, testData); System.out.println(eval.toSummaryString("\nResults:\n", false)); }
From source file:com.reactivetechnologies.analytics.core.eval.BaggingWithBuiltClassifiers.java
License:Open Source License
@Override public void buildClassifier(Instances data) throws Exception { // can classifier handle the data? getCapabilities().testWithFail(data); // remove instances with missing class data = new Instances(data); data.deleteWithMissingClass();/* w ww .ja va 2s . c o m*/ /** Changed here: Use supplied classifier */ //super.buildClassifier(data); /** End change */ if (m_CalcOutOfBag && (m_BagSizePercent != 100)) { throw new IllegalArgumentException( "Bag size needs to be 100% if " + "out-of-bag error is to be calculated!"); } int bagSize = (int) (data.numInstances() * (m_BagSizePercent / 100.0)); Random random = new Random(m_Seed); boolean[][] inBag = null; if (m_CalcOutOfBag) inBag = new boolean[m_Classifiers.length][]; for (int j = 0; j < m_Classifiers.length; j++) { Instances bagData = null; // create the in-bag dataset if (m_CalcOutOfBag) { inBag[j] = new boolean[data.numInstances()]; bagData = data.resampleWithWeights(random, inBag[j]); } else { bagData = data.resampleWithWeights(random); if (bagSize < data.numInstances()) { bagData.randomize(random); Instances newBagData = new Instances(bagData, 0, bagSize); bagData = newBagData; } } /** Changed here: Use supplied classifier */ /*if (m_Classifier instanceof Randomizable) { ((Randomizable) m_Classifiers[j]).setSeed(random.nextInt()); } // build the classifier m_Classifiers[j].buildClassifier(bagData);*/ /** End change */ } // calc OOB error? if (getCalcOutOfBag()) { double outOfBagCount = 0.0; double errorSum = 0.0; boolean numeric = data.classAttribute().isNumeric(); for (int i = 0; i < data.numInstances(); i++) { double vote; double[] votes; if (numeric) votes = new double[1]; else votes = new double[data.numClasses()]; // determine predictions for instance int voteCount = 0; for (int j = 0; j < m_Classifiers.length; j++) { if (inBag[j][i]) continue; voteCount++; // double pred = m_Classifiers[j].classifyInstance(data.instance(i)); if (numeric) { // votes[0] += pred; votes[0] += m_Classifiers[j].classifyInstance(data.instance(i)); } else { // votes[(int) pred]++; double[] newProbs = m_Classifiers[j].distributionForInstance(data.instance(i)); // average the probability estimates for (int k = 0; k < newProbs.length; k++) { votes[k] += newProbs[k]; } } } // "vote" if (numeric) { vote = votes[0]; if (voteCount > 0) { vote /= voteCount; // average } } else { if (Utils.eq(Utils.sum(votes), 0)) { } else { Utils.normalize(votes); } vote = Utils.maxIndex(votes); // predicted class } // error for instance outOfBagCount += data.instance(i).weight(); if (numeric) { errorSum += StrictMath.abs(vote - data.instance(i).classValue()) * data.instance(i).weight(); } else { if (vote != data.instance(i).classValue()) errorSum += data.instance(i).weight(); } } m_OutOfBagError = errorSum / outOfBagCount; } else { m_OutOfBagError = 0; } }