List of usage examples for weka.classifiers.rules ZeroR ZeroR
ZeroR
From source file:REPTree.java
License:Open Source License
/** * Builds classifier./*from ww w . j a v a2s . c o m*/ * * @param data the data to train with * @throws Exception if building fails */ public void buildClassifier(Instances data) throws Exception { // can classifier handle the data? getCapabilities().testWithFail(data); // remove instances with missing class data = new Instances(data); data.deleteWithMissingClass(); Random random = new Random(m_Seed); m_zeroR = null; if (data.numAttributes() == 1) { m_zeroR = new ZeroR(); m_zeroR.buildClassifier(data); return; } // Randomize and stratify data.randomize(random); if (data.classAttribute().isNominal()) { data.stratify(m_NumFolds); } // Split data into training and pruning set Instances train = null; Instances prune = null; if (!m_NoPruning) { train = data.trainCV(m_NumFolds, 0, random); prune = data.testCV(m_NumFolds, 0); } else { train = data; } // Create array of sorted indices and weights int[][][] sortedIndices = new int[1][train.numAttributes()][0]; double[][][] weights = new double[1][train.numAttributes()][0]; double[] vals = new double[train.numInstances()]; for (int j = 0; j < train.numAttributes(); j++) { if (j != train.classIndex()) { weights[0][j] = new double[train.numInstances()]; if (train.attribute(j).isNominal()) { // Handling nominal attributes. Putting indices of // instances with missing values at the end. sortedIndices[0][j] = new int[train.numInstances()]; int count = 0; for (int i = 0; i < train.numInstances(); i++) { Instance inst = train.instance(i); if (!inst.isMissing(j)) { sortedIndices[0][j][count] = i; weights[0][j][count] = inst.weight(); count++; } } for (int i = 0; i < train.numInstances(); i++) { Instance inst = train.instance(i); if (inst.isMissing(j)) { sortedIndices[0][j][count] = i; weights[0][j][count] = inst.weight(); count++; } } } else { // Sorted indices are computed for numeric attributes for (int i = 0; i < train.numInstances(); i++) { Instance inst = train.instance(i); vals[i] = inst.value(j); } sortedIndices[0][j] = Utils.sort(vals); for (int i = 0; i < train.numInstances(); i++) { weights[0][j][i] = train.instance(sortedIndices[0][j][i]).weight(); } } } } // Compute initial class counts double[] classProbs = new double[train.numClasses()]; double totalWeight = 0, totalSumSquared = 0; for (int i = 0; i < train.numInstances(); i++) { Instance inst = train.instance(i); if (data.classAttribute().isNominal()) { classProbs[(int) inst.classValue()] += inst.weight(); totalWeight += inst.weight(); } else { classProbs[0] += inst.classValue() * inst.weight(); totalSumSquared += inst.classValue() * inst.classValue() * inst.weight(); totalWeight += inst.weight(); } } m_Tree = new Tree(); double trainVariance = 0; if (data.classAttribute().isNumeric()) { trainVariance = m_Tree.singleVariance(classProbs[0], totalSumSquared, totalWeight) / totalWeight; classProbs[0] /= totalWeight; } // Build tree m_Tree.buildTree(sortedIndices, weights, train, totalWeight, classProbs, new Instances(train, 0), m_MinNum, m_MinVarianceProp * trainVariance, 0, m_MaxDepth); // Insert pruning data and perform reduced error pruning if (!m_NoPruning) { m_Tree.insertHoldOutSet(prune); m_Tree.reducedErrorPrune(); m_Tree.backfitHoldOutSet(); } }
From source file:REPRandomTree.java
License:Open Source License
/** * Builds classifier./*from w ww . j a va 2 s.com*/ * * @param data the data to train with * @throws Exception if building fails */ public void buildClassifier(Instances data) throws Exception { // can classifier handle the data? getCapabilities().testWithFail(data); // remove instances with missing class data = new Instances(data); data.deleteWithMissingClass(); Random random = new Random(m_Seed); m_zeroR = null; if (data.numAttributes() == 1) { m_zeroR = new ZeroR(); m_zeroR.buildClassifier(data); return; } // Randomize and stratify data.randomize(random); if (data.classAttribute().isNominal()) { data.stratify(m_NumFolds); } // Split data into training and pruning set Instances train = null; Instances prune = null; if (!m_NoPruning) { train = data.trainCV(m_NumFolds, 0, random); prune = data.testCV(m_NumFolds, 0); } else { train = data; } // Create array of sorted indices and weights int[][][] sortedIndices = new int[1][train.numAttributes()][0]; double[][][] weights = new double[1][train.numAttributes()][0]; double[] vals = new double[train.numInstances()]; for (int j = 0; j < train.numAttributes(); j++) { if (j != train.classIndex()) { weights[0][j] = new double[train.numInstances()]; if (train.attribute(j).isNominal()) { // Handling nominal attributes. Putting indices of // instances with missing values at the end. sortedIndices[0][j] = new int[train.numInstances()]; int count = 0; for (int i = 0; i < train.numInstances(); i++) { Instance inst = train.instance(i); if (!inst.isMissing(j)) { sortedIndices[0][j][count] = i; weights[0][j][count] = inst.weight(); count++; } } for (int i = 0; i < train.numInstances(); i++) { Instance inst = train.instance(i); if (inst.isMissing(j)) { sortedIndices[0][j][count] = i; weights[0][j][count] = inst.weight(); count++; } } } else { // Sorted indices are computed for numeric attributes for (int i = 0; i < train.numInstances(); i++) { Instance inst = train.instance(i); vals[i] = inst.value(j); } sortedIndices[0][j] = Utils.sort(vals); for (int i = 0; i < train.numInstances(); i++) { weights[0][j][i] = train.instance(sortedIndices[0][j][i]).weight(); } } } } // Compute initial class counts double[] classProbs = new double[train.numClasses()]; double totalWeight = 0, totalSumSquared = 0; for (int i = 0; i < train.numInstances(); i++) { Instance inst = train.instance(i); if (data.classAttribute().isNominal()) { classProbs[(int) inst.classValue()] += inst.weight(); totalWeight += inst.weight(); } else { classProbs[0] += inst.classValue() * inst.weight(); totalSumSquared += inst.classValue() * inst.classValue() * inst.weight(); totalWeight += inst.weight(); } } m_Tree = new Tree(); double trainVariance = 0; if (data.classAttribute().isNumeric()) { trainVariance = m_Tree.singleVariance(classProbs[0], totalSumSquared, totalWeight) / totalWeight; classProbs[0] /= totalWeight; } // Build tree m_Tree.buildTree(sortedIndices, weights, train, totalWeight, classProbs, new Instances(train, 0), m_MinNum, m_MinVarianceProp * trainVariance, 0, m_MaxDepth, m_FeatureFrac, random); // Insert pruning data and perform reduced error pruning if (!m_NoPruning) { m_Tree.insertHoldOutSet(prune); m_Tree.reducedErrorPrune(); m_Tree.backfitHoldOutSet(); } }
From source file:MultiClassClassifier.java
License:Open Source License
/** * Builds the classifiers.//from w w w .j a va 2 s . c om * * @param insts the training data. * @throws Exception if a classifier can't be built */ public void buildClassifier(Instances insts) throws Exception { Instances newInsts; // can classifier handle the data? getCapabilities().testWithFail(insts); // remove instances with missing class insts = new Instances(insts); insts.deleteWithMissingClass(); if (m_Classifier == null) { throw new Exception("No base classifier has been set!"); } m_ZeroR = new ZeroR(); m_ZeroR.buildClassifier(insts); m_TwoClassDataset = null; int numClassifiers = insts.numClasses(); if (numClassifiers <= 2) { m_Classifiers = Classifier.makeCopies(m_Classifier, 1); m_Classifiers[0].buildClassifier(insts); m_ClassFilters = null; } else if (m_Method == METHOD_1_AGAINST_1) { // generate fastvector of pairs FastVector pairs = new FastVector(); for (int i = 0; i < insts.numClasses(); i++) { for (int j = 0; j < insts.numClasses(); j++) { if (j <= i) continue; int[] pair = new int[2]; pair[0] = i; pair[1] = j; pairs.addElement(pair); } } numClassifiers = pairs.size(); m_Classifiers = Classifier.makeCopies(m_Classifier, numClassifiers); m_ClassFilters = new Filter[numClassifiers]; m_SumOfWeights = new double[numClassifiers]; // generate the classifiers for (int i = 0; i < numClassifiers; i++) { RemoveWithValues classFilter = new RemoveWithValues(); classFilter.setAttributeIndex("" + (insts.classIndex() + 1)); classFilter.setModifyHeader(true); classFilter.setInvertSelection(true); classFilter.setNominalIndicesArr((int[]) pairs.elementAt(i)); Instances tempInstances = new Instances(insts, 0); tempInstances.setClassIndex(-1); classFilter.setInputFormat(tempInstances); newInsts = Filter.useFilter(insts, classFilter); if (newInsts.numInstances() > 0) { newInsts.setClassIndex(insts.classIndex()); m_Classifiers[i].buildClassifier(newInsts); m_ClassFilters[i] = classFilter; m_SumOfWeights[i] = newInsts.sumOfWeights(); } else { m_Classifiers[i] = null; m_ClassFilters[i] = null; } } // construct a two-class header version of the dataset m_TwoClassDataset = new Instances(insts, 0); int classIndex = m_TwoClassDataset.classIndex(); m_TwoClassDataset.setClassIndex(-1); m_TwoClassDataset.deleteAttributeAt(classIndex); FastVector classLabels = new FastVector(); classLabels.addElement("class0"); classLabels.addElement("class1"); m_TwoClassDataset.insertAttributeAt(new Attribute("class", classLabels), classIndex); m_TwoClassDataset.setClassIndex(classIndex); } else { // use error correcting code style methods Code code = null; switch (m_Method) { case METHOD_ERROR_EXHAUSTIVE: code = new ExhaustiveCode(numClassifiers); break; case METHOD_ERROR_RANDOM: code = new RandomCode(numClassifiers, (int) (numClassifiers * m_RandomWidthFactor), insts); break; case METHOD_1_AGAINST_ALL: code = new StandardCode(numClassifiers); break; default: throw new Exception("Unrecognized correction code type"); } numClassifiers = code.size(); m_Classifiers = Classifier.makeCopies(m_Classifier, numClassifiers); m_ClassFilters = new MakeIndicator[numClassifiers]; for (int i = 0; i < m_Classifiers.length; i++) { m_ClassFilters[i] = new MakeIndicator(); MakeIndicator classFilter = (MakeIndicator) m_ClassFilters[i]; classFilter.setAttributeIndex("" + (insts.classIndex() + 1)); classFilter.setValueIndices(code.getIndices(i)); classFilter.setNumeric(false); classFilter.setInputFormat(insts); newInsts = Filter.useFilter(insts, m_ClassFilters[i]); m_Classifiers[i].buildClassifier(newInsts); } } m_ClassAttribute = insts.classAttribute(); }
From source file:WrapperSubset.java
License:Open Source License
protected void resetOptions() { m_trainInstances = null; m_Evaluation = null; m_BaseClassifier = new ZeroR(); m_folds = 5; m_seed = 1; m_threshold = 0.01; }
From source file:adams.opt.cso.AbstractClassifierBasedSimpleCatSwarmOptimization.java
License:Open Source License
/** * Returns the default classifier to use. * * @return the classifier */ protected Classifier getDefaultClassifier() { return new ZeroR(); }
From source file:com.spread.experiment.tempuntilofficialrelease.ClassificationViaClustering108.java
License:Open Source License
/** * builds the classifier//from www . j av a 2 s. c o m * * @param data the training instances * @throws Exception if something goes wrong */ @Override public void buildClassifier(Instances data) throws Exception { // can classifier handle the data? getCapabilities().testWithFail(data); // save original header (needed for clusters to classes output) m_OriginalHeader = data.stringFreeStructure(); // remove class attribute for clusterer Instances clusterData = new Instances(data); clusterData.setClassIndex(-1); clusterData.deleteAttributeAt(data.classIndex()); m_ClusteringHeader = clusterData.stringFreeStructure(); if (m_ClusteringHeader.numAttributes() == 0) { System.err.println("Data contains only class attribute, defaulting to ZeroR model."); m_ZeroR = new ZeroR(); m_ZeroR.buildClassifier(data); } else { m_ZeroR = null; // build clusterer m_ActualClusterer = AbstractClusterer.makeCopy(m_Clusterer); m_ActualClusterer.buildClusterer(clusterData); if (!getLabelAllClusters()) { // determine classes-to-clusters mapping ClusterEvaluation eval = new ClusterEvaluation(); eval.setClusterer(m_ActualClusterer); eval.evaluateClusterer(clusterData); double[] clusterAssignments = eval.getClusterAssignments(); int[][] counts = new int[eval.getNumClusters()][m_OriginalHeader.numClasses()]; int[] clusterTotals = new int[eval.getNumClusters()]; double[] best = new double[eval.getNumClusters() + 1]; double[] current = new double[eval.getNumClusters() + 1]; for (int i = 0; i < data.numInstances(); i++) { Instance instance = data.instance(i); if (!instance.classIsMissing()) { counts[(int) clusterAssignments[i]][(int) instance.classValue()]++; clusterTotals[(int) clusterAssignments[i]]++; } } best[eval.getNumClusters()] = Double.MAX_VALUE; ClusterEvaluation.mapClasses(eval.getNumClusters(), 0, counts, clusterTotals, current, best, 0); m_ClustersToClasses = new double[best.length]; System.arraycopy(best, 0, m_ClustersToClasses, 0, best.length); } else { m_ClusterClassProbs = new double[m_ActualClusterer.numberOfClusters()][data.numClasses()]; for (int i = 0; i < data.numInstances(); i++) { Instance clusterInstance = clusterData.instance(i); Instance originalInstance = data.instance(i); if (!originalInstance.classIsMissing()) { double[] probs = m_ActualClusterer.distributionForInstance(clusterInstance); for (int j = 0; j < probs.length; j++) { m_ClusterClassProbs[j][(int) originalInstance.classValue()] += probs[j]; } } } for (int i = 0; i < m_ClusterClassProbs.length; i++) { Utils.normalize(m_ClusterClassProbs[i]); } } } }
From source file:de.ugoe.cs.cpdp.training.WekaTestAwareTraining.java
License:Apache License
@Override public void apply(Instances testdata, Instances traindata) { classifier = setupClassifier();// w ww . jav a 2 s . c om if (!(classifier instanceof ITestAwareClassifier)) { throw new RuntimeException( "classifier must implement the ITestAwareClassifier interface in order to be used as TestAwareTrainingStrategy"); } ((ITestAwareClassifier) classifier).setTestdata(testdata); try { if (classifier == null) { Console.traceln(Level.WARNING, String.format("classifier null!")); } classifier.buildClassifier(traindata); } catch (Exception e) { if (e.getMessage().contains("Not enough training instances with class labels")) { Console.traceln(Level.SEVERE, "failure due to lack of instances: " + e.getMessage()); Console.traceln(Level.SEVERE, "training ZeroR classifier instead"); classifier = new ZeroR(); try { classifier.buildClassifier(traindata); } catch (Exception e2) { throw new RuntimeException(e2); } } else { throw new RuntimeException(e); } } }
From source file:hero.unstable.util.classification.wekaClassifier.java
public wekaClassifier(String nameClassifier, String classifierOpt, int seed, int folds) throws Exception { String[] opts = classifierOpt.split(" "); this.seed = seed; this.folds = folds; // Create classifier if (nameClassifier.equals("AdaBoostM1")) { this.classifier = new AdaBoostM1(); } else if (nameClassifier.equals("J48")) { this.classifier = new AdaBoostM1(); } else if (nameClassifier.equals("RandomForest")) { this.classifier = new RandomForest(); } else if (nameClassifier.equals("Bayes")) { this.classifier = new BayesNet(); } else if (nameClassifier.equals("knn")) { this.classifier = new IBk(); } else if (nameClassifier.equals("ZeroR")) { this.classifier = new ZeroR(); } else if (nameClassifier.equals("NN")) { this.classifier = new MultilayerPerceptron(); } else {//from ww w. jav a 2s . c o m this.classifier = new ZeroR(); } this.nameClassifier = classifier.getClass().getName(); }
From source file:lu.lippmann.cdb.ext.hydviga.gaps.GapFillerFactory.java
License:Open Source License
public static GapFiller getGapFiller(final Algo algo) throws Exception { final GapFiller tsgp; if (algo == Algo.EM_WITH_DISCR_TIME) tsgp = new GapFillerEM(true); else if (algo == Algo.EM) tsgp = new GapFillerEM(false); else if (algo == Algo.Interpolation) tsgp = new GapFillerInterpolation(false); else if (algo == Algo.ZeroR) tsgp = new GapFillerClassifier(false, new ZeroR()); else if (algo == Algo.REG_WITH_DISCR_TIME) tsgp = new GapFillerRegressions(true); else if (algo == Algo.REG) tsgp = new GapFillerRegressions(false); else if (algo == Algo.M5P_WITH_DISCR_TIME) tsgp = new GapFillerClassifier(true, new M5P()); else if (algo == Algo.M5P) tsgp = new GapFillerClassifier(false, new M5P()); else if (algo == Algo.ANN_WITH_DISCR_TIME) tsgp = new GapFillerClassifier(true, new MultilayerPerceptron()); else if (algo == Algo.ANN) tsgp = new GapFillerClassifier(false, new MultilayerPerceptron()); else if (algo == Algo.NEARESTNEIGHBOUR_WITH_DISCR_TIME) tsgp = new GapFillerClassifier(true, new IBk()); else if (algo == Algo.NEARESTNEIGHBOUR) tsgp = new GapFillerClassifier(false, new IBk()); else//from ww w . jav a 2 s . c o m throw new Exception("Algo not managed -> " + algo); return tsgp; }
From source file:lu.lippmann.cdb.ext.hydviga.gaps.GapFillerFactory.java
License:Open Source License
public static GapFiller getGapFiller(final String algoname, final boolean useDiscretizedTime) throws Exception { final GapFiller tsgp; if (algoname.equals("EM")) tsgp = new GapFillerEM(useDiscretizedTime); else if (algoname.equals("Interpolation")) tsgp = new GapFillerInterpolation(useDiscretizedTime); else if (algoname.equals("ZeroR")) tsgp = new GapFillerClassifier(useDiscretizedTime, new ZeroR()); else if (algoname.equals("REG")) tsgp = new GapFillerRegressions(useDiscretizedTime); else if (algoname.equals("M5P")) tsgp = new GapFillerClassifier(useDiscretizedTime, new M5P()); else if (algoname.equals("ANN")) tsgp = new GapFillerClassifier(useDiscretizedTime, new MultilayerPerceptron()); else if (algoname.equals("NEARESTNEIGHBOUR")) tsgp = new GapFillerClassifier(useDiscretizedTime, new IBk()); else/* w w w .j a va2 s .co m*/ throw new Exception("Algo name not managed -> " + algoname); return tsgp; }