List of usage examples for weka.core Instances numClasses
publicint numClasses()
From source file:org.scripps.branch.classifier.ManualTree.java
License:Open Source License
/** * Recursively backfits data into the tree. * /* w ww . ja va 2s . com*/ * @param data * the data to work with * @param classProbs * the class distribution * @throws Exception * if generation fails */ protected void backfitData(Instances data, double[] classProbs) throws Exception { // Make leaf if there are no training instances if (data.numInstances() == 0) { m_Attribute = -1; m_ClassDistribution = null; m_Prop = null; return; } // Check if node doesn't contain enough instances or is pure // or maximum depth reached m_ClassDistribution = classProbs.clone(); /* * if (Utils.sum(m_ClassDistribution) < 2 * m_MinNum || * Utils.eq(m_ClassDistribution[Utils.maxIndex(m_ClassDistribution)], * Utils .sum(m_ClassDistribution))) { * * // Make leaf m_Attribute = -1; m_Prop = null; return; } */ // Are we at an inner node if (m_Attribute > -1) { // Compute new weights for subsets based on backfit data m_Prop = new double[m_Successors.length]; for (int i = 0; i < data.numInstances(); i++) { Instance inst = data.instance(i); if (!inst.isMissing(m_Attribute)) { if (data.attribute(m_Attribute).isNominal()) { m_Prop[(int) inst.value(m_Attribute)] += inst.weight(); } else { m_Prop[(inst.value(m_Attribute) < m_SplitPoint) ? 0 : 1] += inst.weight(); } } } // If we only have missing values we can make this node into a leaf if (Utils.sum(m_Prop) <= 0) { m_Attribute = -1; m_Prop = null; return; } // Otherwise normalize the proportions Utils.normalize(m_Prop); // Split data Instances[] subsets = splitData(data); // Go through subsets for (int i = 0; i < subsets.length; i++) { // Compute distribution for current subset double[] dist = new double[data.numClasses()]; for (int j = 0; j < subsets[i].numInstances(); j++) { dist[(int) subsets[i].instance(j).classValue()] += subsets[i].instance(j).weight(); } // Backfit subset m_Successors[i].backfitData(subsets[i], dist); } // If unclassified instances are allowed, we don't need to store the // class distribution if (getAllowUnclassifiedInstances()) { m_ClassDistribution = null; return; } // Otherwise, if all successors are non-empty, we don't need to // store the class distribution boolean emptySuccessor = false; for (int i = 0; i < subsets.length; i++) { if (m_Successors[i].m_ClassDistribution == null) { emptySuccessor = true; return; } } m_ClassDistribution = null; // If we have a least two non-empty successors, we should keep this // tree /* * int nonEmptySuccessors = 0; for (int i = 0; i < subsets.length; * i++) { if (m_Successors[i].m_ClassDistribution != null) { * nonEmptySuccessors++; if (nonEmptySuccessors > 1) { return; } } } * * // Otherwise, this node is a leaf or should become a leaf * m_Successors = null; m_Attribute = -1; m_Prop = null; return; */ } }
From source file:org.scripps.branch.classifier.ManualTree.java
License:Open Source License
/** * Builds classifier./*from www. j a v a2 s . c om*/ * * @param data * the data to train with * @throws Exception * if something goes wrong or the data doesn't fit */ @Override public void buildClassifier(Instances data) throws Exception { // Make sure K value is in range if (m_KValue > data.numAttributes() - 1) m_KValue = data.numAttributes() - 1; if (m_KValue < 1) m_KValue = (int) Utils.log2(data.numAttributes()) + 1; // can classifier handle the data? getCapabilities().testWithFail(data); // remove instances with missing class data = new Instances(data); data.deleteWithMissingClass(); // only class? -> build ZeroR model if (data.numAttributes() == 1) { System.err.println( "Cannot build model (only class attribute present in data!), " + "using ZeroR model instead!"); m_ZeroR = new weka.classifiers.rules.ZeroR(); m_ZeroR.buildClassifier(data); return; } else { m_ZeroR = null; } // Figure out appropriate datasets Instances train = null; Instances backfit = null; Random rand = data.getRandomNumberGenerator(m_randomSeed); if (m_NumFolds <= 0) { train = data; } else { data.randomize(rand); data.stratify(m_NumFolds); train = data.trainCV(m_NumFolds, 1, rand); backfit = data.testCV(m_NumFolds, 1); } //Set Default Instances for selection. setRequiredInst(data); // Create the attribute indices window int[] attIndicesWindow = new int[data.numAttributes() - 1]; int j = 0; for (int i = 0; i < attIndicesWindow.length; i++) { if (j == data.classIndex()) j++; // do not include the class attIndicesWindow[i] = j++; } // Compute initial class counts double[] classProbs = new double[train.numClasses()]; for (int i = 0; i < train.numInstances(); i++) { Instance inst = train.instance(i); classProbs[(int) inst.classValue()] += inst.weight(); } Instances requiredInstances = getRequiredInst(); // Build tree if (jsontree != null) { buildTree(train, classProbs, new Instances(data, 0), m_Debug, 0, jsontree, 0, m_distributionData, requiredInstances, listOfFc, cSetList, ccSer, d); } else { System.out.println("No json tree specified, failing to process tree"); } setRequiredInst(requiredInstances); // Backfit if required if (backfit != null) { backfitData(backfit); } }
From source file:org.scripps.branch.classifier.ManualTree.java
License:Open Source License
/** * Computes class distribution for an attribute. * //w ww .ja v a 2s . co m * @param props * @param dists * @param att * the attribute index * @param data * the data to work with * @throws Exception * if something goes wrong */ protected HashMap<String, Double> distribution(double[][] props, double[][][] dists, int att, Instances data, double givenSplitPoint, HashMap<String, Classifier> custom_classifiers) throws Exception { HashMap<String, Double> mp = new HashMap<String, Double>(); double splitPoint = givenSplitPoint; double origSplitPoint = 0; Attribute attribute = null; double[][] dist = null; int indexOfFirstMissingValue = -1; String CustomClassifierId = null; CustomSet cSet = null; if (att >= data.numAttributes() && att < data.numAttributes() + custom_classifiers.size()) { CustomClassifierId = getKeyinMap(custom_classifiers, att, data); } else if (att >= data.numAttributes() + custom_classifiers.size()) { cSet = getReqCustomSet(att - (data.numAttributes() - 1 + custom_classifiers.size()), cSetList); } else { attribute = data.attribute(att); } if (CustomClassifierId == null && cSet == null) { if (attribute.isNominal()) { // For nominal attributes dist = new double[attribute.numValues()][data.numClasses()]; for (int i = 0; i < data.numInstances(); i++) { Instance inst = data.instance(i); if (inst.isMissing(att)) { // Skip missing values at this stage if (indexOfFirstMissingValue < 0) { indexOfFirstMissingValue = i; } continue; } dist[(int) inst.value(att)][(int) inst.classValue()] += inst.weight(); } } else { // For numeric attributes double[][] currDist = new double[2][data.numClasses()]; dist = new double[2][data.numClasses()]; // Sort data data.sort(att); // Move all instances into second subset for (int j = 0; j < data.numInstances(); j++) { Instance inst = data.instance(j); if (inst.isMissing(att)) { // Can stop as soon as we hit a missing value indexOfFirstMissingValue = j; break; } currDist[1][(int) inst.classValue()] += inst.weight(); } // Value before splitting double priorVal = priorVal(currDist); // Save initial distribution for (int j = 0; j < currDist.length; j++) { System.arraycopy(currDist[j], 0, dist[j], 0, dist[j].length); } if (Double.isNaN(splitPoint)) { // Try all possible split points double currSplit = data.instance(0).value(att); double currVal, bestVal = -Double.MAX_VALUE; for (int i = 0; i < data.numInstances(); i++) { Instance inst = data.instance(i); if (inst.isMissing(att)) { // Can stop as soon as we hit a missing value break; } // Can we place a sensible split point here? if (inst.value(att) > currSplit) { // Compute gain for split point currVal = gain(currDist, priorVal); // Is the current split point the best point so far? if (currVal > bestVal) { // Store value of current point bestVal = currVal; // Save split point splitPoint = (inst.value(att) + currSplit) / 2.0; origSplitPoint = splitPoint; // Save distribution for (int j = 0; j < currDist.length; j++) { System.arraycopy(currDist[j], 0, dist[j], 0, dist[j].length); } } } currSplit = inst.value(att); // Shift over the weight currDist[0][(int) inst.classValue()] += inst.weight(); currDist[1][(int) inst.classValue()] -= inst.weight(); } } else { double currSplit = data.instance(0).value(att); double currVal, bestVal = -Double.MAX_VALUE; // Split data set using given split point. for (int i = 0; i < data.numInstances(); i++) { Instance inst = data.instance(i); if (inst.isMissing(att)) { // Can stop as soon as we hit a missing value break; } if (inst.value(att) > currSplit) { // Compute gain for split point currVal = gain(currDist, priorVal); // Is the current split point the best point so far? if (currVal > bestVal) { // Store value of current point bestVal = currVal; // Save computed split point origSplitPoint = (inst.value(att) + currSplit) / 2.0; } } currSplit = inst.value(att); // Shift over the weight currDist[0][(int) inst.classValue()] += inst.weight(); currDist[1][(int) inst.classValue()] -= inst.weight(); if (inst.value(att) <= splitPoint) { // Save distribution since split point is specified for (int j = 0; j < currDist.length; j++) { System.arraycopy(currDist[j], 0, dist[j], 0, dist[j].length); } } } } } } else if (CustomClassifierId != null) { Classifier fc = custom_classifiers.get(CustomClassifierId); dist = new double[data.numClasses()][data.numClasses()]; Instance inst; for (int i = 0; i < data.numInstances(); i++) { inst = data.instance(i); double predictedClass = fc.classifyInstance(inst); if (predictedClass != Instance.missingValue()) { dist[(int) predictedClass][(int) inst.classValue()] += inst.weight(); } } } else if (cSet != null) { dist = new double[data.numClasses()][data.numClasses()]; JsonNode vertices = mapper.readTree(cSet.getConstraints()); ArrayList<double[]> attrVertices = generateVerticesList(vertices); List<Attribute> aList = generateAttributeList(cSet, data, d); double[] testPoint = new double[2]; int ctr = 0; for (int k = 0; k < data.numInstances(); k++) { testPoint = new double[2]; ctr = 0; for (Attribute a : aList) { if (!data.instance(k).isMissing(a)) { testPoint[ctr] = data.instance(k).value(a); ctr++; } } int check = checkPointInPolygon(attrVertices, testPoint); dist[check][(int) data.instance(k).classValue()] += data.instance(k).weight(); } } // Compute weights for subsetsCustomClassifierIndex props[att] = new double[dist.length]; for (int k = 0; k < props[att].length; k++) { props[att][k] = Utils.sum(dist[k]); } if (Utils.eq(Utils.sum(props[att]), 0)) { for (int k = 0; k < props[att].length; k++) { props[att][k] = 1.0 / props[att].length; } } else { Utils.normalize(props[att]); } // Any instances with missing values ? if (indexOfFirstMissingValue > -1) { // Distribute weights for instances with missing values for (int i = indexOfFirstMissingValue; i < data.numInstances(); i++) { Instance inst = data.instance(i); if (attribute.isNominal()) { // Need to check if attribute value is missing if (inst.isMissing(att)) { for (int j = 0; j < dist.length; j++) { dist[j][(int) inst.classValue()] += props[att][j] * inst.weight(); } } } else { // Can be sure that value is missing, so no test required for (int j = 0; j < dist.length; j++) { dist[j][(int) inst.classValue()] += props[att][j] * inst.weight(); } } } } // Return distribution and split point dists[att] = dist; mp.put("split_point", splitPoint); mp.put("orig_split_point", origSplitPoint); return mp; }
From source file:org.ssase.debt.classification.OnlineMultilayerPerceptron.java
License:Open Source License
/** * Returns default capabilities of the classifier. * /*w w w . j a va 2s. c o m*/ * @return the capabilities of this classifier */ /* * public Capabilities getCapabilities() { Capabilities result = * super.getCapabilities(); result.disableAll(); * * // attributes result.enable(Capability.NOMINAL_ATTRIBUTES); * result.enable(Capability.NUMERIC_ATTRIBUTES); * result.enable(Capability.DATE_ATTRIBUTES); * result.enable(Capability.MISSING_VALUES); * * // class result.enable(Capability.NOMINAL_CLASS); * result.enable(Capability.NUMERIC_CLASS); * result.enable(Capability.DATE_CLASS); * result.enable(Capability.MISSING_CLASS_VALUES); * * return result; } */ public void buildClassifier_old(Instances i) throws Exception { // can classifier handle the data? // getCapabilities().testWithFail(i); // remove instances with missing class i = new Instances(i); i.deleteWithMissingClass(); m_ZeroR = new weka.classifiers.rules.ZeroR(); m_ZeroR.buildClassifier(i); // only class? -> use ZeroR model if (i.numAttributes() == 1) { System.err.println( "Cannot build model (only class attribute present in data!), " + "using ZeroR model instead!"); m_useDefaultModel = true; return; } else { m_useDefaultModel = false; } m_epoch = 0; m_error = 0; m_instances = null; m_currentInstance = null; m_controlPanel = null; m_nodePanel = null; m_outputs = new NeuralEnd[0]; m_inputs = new NeuralEnd[0]; m_numAttributes = 0; m_numClasses = 0; m_neuralNodes = new NeuralConnection[0]; m_selected = new FastVector(4); m_graphers = new FastVector(2); m_nextId = 0; m_stopIt = true; m_stopped = true; m_accepted = false; m_instances = new Instances(i); m_random = new Random(m_randomSeed); m_instances.randomize(m_random); if (m_useNomToBin) { m_nominalToBinaryFilter = new NominalToBinary(); m_nominalToBinaryFilter.setInputFormat(m_instances); m_instances = Filter.useFilter(m_instances, m_nominalToBinaryFilter); } m_numAttributes = m_instances.numAttributes() - 1; m_numClasses = m_instances.numClasses(); setClassType(m_instances); // this sets up the validation set. Instances valSet = null; // numinval is needed later int numInVal = (int) (m_valSize / 100.0 * m_instances.numInstances()); if (m_valSize > 0) { if (numInVal == 0) { numInVal = 1; } valSet = new Instances(m_instances, 0, numInVal); } // ///////// setupInputs(); setupOutputs(); if (m_autoBuild) { setupHiddenLayer(); } // /////////////////////////// // this sets up the gui for usage if (m_gui) { m_win = new JFrame(); m_win.addWindowListener(new WindowAdapter() { @Override public void windowClosing(WindowEvent e) { boolean k = m_stopIt; m_stopIt = true; int well = JOptionPane .showConfirmDialog(m_win, "Are You Sure...\n" + "Click Yes To Accept" + " The Neural Network" + "\n Click No To Return", "Accept Neural Network", JOptionPane.YES_NO_OPTION); if (well == 0) { m_win.setDefaultCloseOperation(JFrame.DISPOSE_ON_CLOSE); m_accepted = true; blocker(false); } else { m_win.setDefaultCloseOperation(JFrame.DO_NOTHING_ON_CLOSE); } m_stopIt = k; } }); m_win.getContentPane().setLayout(new BorderLayout()); m_win.setTitle("Neural Network"); m_nodePanel = new NodePanel(); // without the following two lines, the // NodePanel.paintComponents(Graphics) // method will go berserk if the network doesn't fit completely: it // will // get called on a constant basis, using 100% of the CPU // see the following forum thread: // http://forum.java.sun.com/thread.jspa?threadID=580929&messageID=2945011 m_nodePanel.setPreferredSize(new Dimension(640, 480)); m_nodePanel.revalidate(); JScrollPane sp = new JScrollPane(m_nodePanel, JScrollPane.VERTICAL_SCROLLBAR_ALWAYS, JScrollPane.HORIZONTAL_SCROLLBAR_NEVER); m_controlPanel = new ControlPanel(); m_win.getContentPane().add(sp, BorderLayout.CENTER); m_win.getContentPane().add(m_controlPanel, BorderLayout.SOUTH); m_win.setSize(640, 480); m_win.setVisible(true); } // This sets up the initial state of the gui if (m_gui) { blocker(true); m_controlPanel.m_changeEpochs.setEnabled(false); m_controlPanel.m_changeLearning.setEnabled(false); m_controlPanel.m_changeMomentum.setEnabled(false); } // For silly situations in which the network gets accepted before // training // commenses if (m_numeric) { setEndsToLinear(); } if (m_accepted) { m_win.dispose(); m_controlPanel = null; m_nodePanel = null; m_instances = new Instances(m_instances, 0); m_currentInstance = null; return; } // connections done. double right = 0; double driftOff = 0; double lastRight = Double.POSITIVE_INFINITY; double bestError = Double.POSITIVE_INFINITY; double tempRate; double totalWeight = 0; double totalValWeight = 0; double origRate = m_learningRate; // only used for when reset // ensure that at least 1 instance is trained through. if (numInVal == m_instances.numInstances()) { numInVal--; } if (numInVal < 0) { numInVal = 0; } for (int noa = numInVal; noa < m_instances.numInstances(); noa++) { if (!m_instances.instance(noa).classIsMissing()) { totalWeight += m_instances.instance(noa).weight(); } } if (m_valSize != 0) { for (int noa = 0; noa < valSet.numInstances(); noa++) { if (!valSet.instance(noa).classIsMissing()) { totalValWeight += valSet.instance(noa).weight(); } } } m_stopped = false; for (int noa = 1; noa < m_numEpochs + 1; noa++) { right = 0; for (int nob = numInVal; nob < m_instances.numInstances(); nob++) { m_currentInstance = m_instances.instance(nob); if (!m_currentInstance.classIsMissing()) { // this is where the network updating (and training occurs, // for the // training set resetNetwork(); calculateOutputs(); tempRate = m_learningRate * m_currentInstance.weight(); if (m_decay) { tempRate /= noa; } right += (calculateErrors() / m_instances.numClasses()) * m_currentInstance.weight(); updateNetworkWeights(tempRate, m_momentum); } } right /= totalWeight; if (Double.isInfinite(right) || Double.isNaN(right)) { if (!m_reset) { m_instances = null; throw new Exception("Network cannot train. Try restarting with a" + " smaller learning rate."); } else { // reset the network if possible if (m_learningRate <= Utils.SMALL) { throw new IllegalStateException( "Learning rate got too small (" + m_learningRate + " <= " + Utils.SMALL + ")!"); } m_learningRate /= 2; buildClassifier(i); m_learningRate = origRate; m_instances = new Instances(m_instances, 0); m_currentInstance = null; return; } } // //////////////////////do validation testing if applicable if (m_valSize != 0) { right = 0; for (int nob = 0; nob < valSet.numInstances(); nob++) { m_currentInstance = valSet.instance(nob); if (!m_currentInstance.classIsMissing()) { // this is where the network updating occurs, for the // validation set resetNetwork(); calculateOutputs(); right += (calculateErrors() / valSet.numClasses()) * m_currentInstance.weight(); // note 'right' could be calculated here just using // the calculate output values. This would be faster. // be less modular } } if (right < lastRight) { if (right < bestError) { bestError = right; // save the network weights at this point for (int noc = 0; noc < m_numClasses; noc++) { m_outputs[noc].saveWeights(); } driftOff = 0; } } else { driftOff++; } lastRight = right; if (driftOff > m_driftThreshold || noa + 1 >= m_numEpochs) { for (int noc = 0; noc < m_numClasses; noc++) { m_outputs[noc].restoreWeights(); } m_accepted = true; } right /= totalValWeight; } m_epoch = noa; m_error = right; // shows what the neuralnet is upto if a gui exists. updateDisplay(); // This junction controls what state the gui is in at the end of // each // epoch, Such as if it is paused, if it is resumable etc... if (m_gui) { while ((m_stopIt || (m_epoch >= m_numEpochs && m_valSize == 0)) && !m_accepted) { m_stopIt = true; m_stopped = true; if (m_epoch >= m_numEpochs && m_valSize == 0) { m_controlPanel.m_startStop.setEnabled(false); } else { m_controlPanel.m_startStop.setEnabled(true); } m_controlPanel.m_startStop.setText("Start"); m_controlPanel.m_startStop.setActionCommand("Start"); m_controlPanel.m_changeEpochs.setEnabled(true); m_controlPanel.m_changeLearning.setEnabled(true); m_controlPanel.m_changeMomentum.setEnabled(true); blocker(true); if (m_numeric) { setEndsToLinear(); } } m_controlPanel.m_changeEpochs.setEnabled(false); m_controlPanel.m_changeLearning.setEnabled(false); m_controlPanel.m_changeMomentum.setEnabled(false); m_stopped = false; // if the network has been accepted stop the training loop if (m_accepted) { m_win.dispose(); m_controlPanel = null; m_nodePanel = null; m_instances = new Instances(m_instances, 0); m_currentInstance = null; return; } } if (m_accepted) { m_instances = new Instances(m_instances, 0); m_currentInstance = null; return; } } if (m_gui) { m_win.dispose(); m_controlPanel = null; m_nodePanel = null; } m_instances = new Instances(m_instances, 0); m_currentInstance = null; }
From source file:org.wkwk.classifier.MyC45.java
public void makeTree(Instances data) throws Exception { if (data.numInstances() == 0) { splitAttribute = null;//from w w w.j a v a2 s . c o m } // Calculate information gain for all attributes, except class attribute double[] infoGains = new double[data.numAttributes()]; for (int i = 0; i < data.numAttributes() - 1; i++) { Attribute m_attr = data.attribute(i); if (m_attr.isNominal()) { infoGains[i] = computeInfoGain(data, data.attribute(i)); } else if (m_attr.isNumeric()) { infoGains[i] = computeInfoGainCont(data, data.attribute(i), bestThreshold(data, m_attr)); } } splitAttribute = data.attribute(Utils.maxIndex(infoGains)); if (splitAttribute.isNumeric()) { attrThreshold = bestThreshold(data, splitAttribute); } if (Utils.eq(infoGains[splitAttribute.index()], 0)) { splitAttribute = null; classDistribution = new double[data.numClasses()]; for (int i = 0; i < data.numInstances(); i++) { int inst = (int) data.instance(i).value(data.classAttribute()); classDistribution[inst]++; } Utils.normalize(classDistribution); classValue = Utils.maxIndex(classDistribution); classAttribute = data.classAttribute(); } else { Instances[] splitData = null; if (splitAttribute.isNominal()) { splitData = splitData(data, splitAttribute); } else if (splitAttribute.isNumeric()) { splitData = splitDataCont(data, splitAttribute, attrThreshold); } if (splitAttribute.isNominal()) { successors = new MyC45[splitAttribute.numValues()]; for (int i = 0; i < splitAttribute.numValues(); i++) { successors[i] = new MyC45(); successors[i].makeTree(splitData[i]); } } else if (splitAttribute.isNumeric()) { successors = new MyC45[2]; for (int i = 0; i < 2; i++) { successors[i] = new MyC45(); successors[i].makeTree(splitData[i]); } } } if (isPruned) { data = prune(data); } }
From source file:org.wkwk.classifier.MyC45.java
public double computeEntropy(Instances data) { // Hitung kemunculan kelas double[] classCounts = new double[data.numClasses()]; Enumeration instEnum = data.enumerateInstances(); while (instEnum.hasMoreElements()) { Instance inst = (Instance) instEnum.nextElement(); classCounts[(int) inst.classValue()]++; }// www. j av a 2 s . com // Hitung entropy double entropy = 0; for (int i = 0; i < data.numClasses(); i++) { if (classCounts[i] > 0) { entropy -= classCounts[i] / data.numInstances() * Utils.log2(classCounts[i] / data.numInstances()); } } return entropy; }
From source file:sg.edu.nus.comp.nlp.ims.classifiers.CMultiClassesSVM.java
License:Open Source License
@Override public void buildClassifier(Instances p_Instances) throws Exception { Instances newInsts = null;//from w w w.j av a 2s. c om if (this.m_Classifier == null) { throw new IllegalStateException("No base classifier has been set!"); } this.m_ZeroR = new ZeroR(); this.m_ZeroR.buildClassifier(p_Instances); this.m_ClassAttribute = p_Instances.classAttribute(); this.getOutputFormat(p_Instances); int numClassifiers = p_Instances.numClasses(); switch (numClassifiers) { case 1: this.m_Classifiers = null; break; case 2: this.m_Classifiers = Classifier.makeCopies(this.m_Classifier, 1); newInsts = new Instances(this.m_OutputFormat, 0); for (int i = 0; i < p_Instances.numInstances(); i++) { Instance inst = this.filterInstance(p_Instances.instance(i)); inst.setDataset(newInsts); newInsts.add(inst); } this.m_Classifiers[0].buildClassifier(newInsts); break; default: this.m_Classifiers = Classifier.makeCopies(this.m_Classifier, numClassifiers); Hashtable<String, ArrayList<Double>> id2Classes = null; if (this.m_IndexOfID >= 0) { id2Classes = new Hashtable<String, ArrayList<Double>>(); for (int i = 0; i < p_Instances.numInstances(); i++) { Instance inst = p_Instances.instance(i); String id = inst.stringValue(this.m_IndexOfID); if (!id2Classes.containsKey(id)) { id2Classes.put(id, new ArrayList<Double>()); } id2Classes.get(id).add(inst.classValue()); } } for (int classIdx = 0; classIdx < this.m_Classifiers.length; classIdx++) { newInsts = this.genInstances(p_Instances, classIdx, id2Classes); this.m_Classifiers[classIdx].buildClassifier(newInsts); } } }
From source file:smo2.SMO.java
License:Open Source License
/** * Method for building the classifier. Implements a one-against-one wrapper * for multi-class problems.// ww w .j av a2s . co m * * @param insts * the set of training instances * @exception Exception * if the classifier can't be built successfully */ public void buildClassifier(Instances insts) throws Exception { if (!m_checksTurnedOff) { if (insts.checkForStringAttributes()) { throw new UnsupportedAttributeTypeException("Cannot handle string attributes!"); } if (insts.classAttribute().isNumeric()) { throw new UnsupportedClassTypeException( "mySMO can't handle a numeric class! Use" + "SMOreg for performing regression."); } insts = new Instances(insts); insts.deleteWithMissingClass(); if (insts.numInstances() == 0) { throw new Exception("No training instances without a missing class!"); } /* * Removes all the instances with weight equal to 0. MUST be done * since condition (8) of Keerthi's paper is made with the assertion * Ci > 0 (See equation (3a). */ Instances data = new Instances(insts, insts.numInstances()); for (int i = 0; i < insts.numInstances(); i++) { if (insts.instance(i).weight() > 0) data.add(insts.instance(i)); } if (data.numInstances() == 0) { throw new Exception("No training instances left after removing " + "instance with either a weight null or a missing class!"); } insts = data; } m_onlyNumeric = true; if (!m_checksTurnedOff) { for (int i = 0; i < insts.numAttributes(); i++) { if (i != insts.classIndex()) { if (!insts.attribute(i).isNumeric()) { m_onlyNumeric = false; break; } } } } if (!m_checksTurnedOff) { m_Missing = new ReplaceMissingValues(); m_Missing.setInputFormat(insts); insts = Filter.useFilter(insts, m_Missing); } else { m_Missing = null; } if (!m_onlyNumeric) { m_NominalToBinary = new NominalToBinary(); m_NominalToBinary.setInputFormat(insts); insts = Filter.useFilter(insts, m_NominalToBinary); } else { m_NominalToBinary = null; } if (m_filterType == FILTER_STANDARDIZE) { m_Filter = new Standardize(); m_Filter.setInputFormat(insts); insts = Filter.useFilter(insts, m_Filter); } else if (m_filterType == FILTER_NORMALIZE) { m_Filter = new Normalize(); m_Filter.setInputFormat(insts); insts = Filter.useFilter(insts, m_Filter); } else { m_Filter = null; } m_classIndex = insts.classIndex(); m_classAttribute = insts.classAttribute(); // Generate subsets representing each class Instances[] subsets = new Instances[insts.numClasses()]; for (int i = 0; i < insts.numClasses(); i++) { subsets[i] = new Instances(insts, insts.numInstances()); } for (int j = 0; j < insts.numInstances(); j++) { Instance inst = insts.instance(j); subsets[(int) inst.classValue()].add(inst); } for (int i = 0; i < insts.numClasses(); i++) { subsets[i].compactify(); } // Build the binary classifiers Random rand = new Random(m_randomSeed); m_classifiers = new BinarymySMO[insts.numClasses()][insts.numClasses()]; for (int i = 0; i < insts.numClasses(); i++) { for (int j = i + 1; j < insts.numClasses(); j++) { m_classifiers[i][j] = new BinarymySMO(); Instances data = new Instances(insts, insts.numInstances()); for (int k = 0; k < subsets[i].numInstances(); k++) { data.add(subsets[i].instance(k)); } for (int k = 0; k < subsets[j].numInstances(); k++) { data.add(subsets[j].instance(k)); } data.compactify(); data.randomize(rand); m_classifiers[i][j].buildClassifier(data, i, j, m_fitLogisticModels, m_numFolds, m_randomSeed); } } }
From source file:test.org.moa.opencl.IBk.java
License:Open Source License
/** * Generates the classifier.// ww w .j av a2s. c om * * @param instances set of instances serving as training data * @throws Exception if the classifier has not been generated successfully */ public void buildClassifier(Instances instances) throws Exception { // can classifier handle the data? getCapabilities().testWithFail(instances); // remove instances with missing class instances = new Instances(instances); instances.deleteWithMissingClass(); m_NumClasses = instances.numClasses(); m_ClassType = instances.classAttribute().type(); m_Train = new Instances(instances, 0, instances.numInstances()); // Throw away initial instances until within the specified window size if ((m_WindowSize > 0) && (instances.numInstances() > m_WindowSize)) { m_Train = new Instances(m_Train, m_Train.numInstances() - m_WindowSize, m_WindowSize); } m_NumAttributesUsed = 0.0; for (int i = 0; i < m_Train.numAttributes(); i++) { if ((i != m_Train.classIndex()) && (m_Train.attribute(i).isNominal() || m_Train.attribute(i).isNumeric())) { m_NumAttributesUsed += 1.0; } } m_NNSearch.setInstances(m_Train); // Invalidate any currently cross-validation selected k m_kNNValid = false; m_defaultModel = new ZeroR(); m_defaultModel.buildClassifier(instances); }
From source file:tr.gov.ulakbim.jDenetX.experiments.wrappers.EvalActiveBoostingID.java
License:Open Source License
public Instances clusteredInstances(Instances data) { if (data == null) { throw new NullPointerException("Data is null at clusteredInstances method"); }//from w w w . j a v a 2 s . c o m Instances sampled_data = data; for (int i = 0; i < sampled_data.numInstances(); i++) { sampled_data.remove(i); } SimpleKMeans sKmeans = new SimpleKMeans(); data.setClassIndex(data.numAttributes() - 1); Remove filter = new Remove(); filter.setAttributeIndices("" + (data.classIndex() + 1)); List assignments = new ArrayList(); try { filter.setInputFormat(data); Instances dataClusterer = Filter.useFilter(data, filter); String[] options = new String[3]; options[0] = "-I"; // max. iterations options[1] = "500"; options[2] = "-O"; sKmeans.setNumClusters(data.numClasses()); sKmeans.setOptions(options); sKmeans.buildClusterer(dataClusterer); System.out.println("Kmeans\n:" + sKmeans); System.out.println(Arrays.toString(sKmeans.getAssignments())); assignments = Arrays.asList(sKmeans.getAssignments()); } catch (Exception e) { e.printStackTrace(); } System.out.println("Assignments\n: " + assignments); ClusterEvaluation eval = new ClusterEvaluation(); eval.setClusterer(sKmeans); try { eval.evaluateClusterer(data); } catch (Exception e) { e.printStackTrace(); } int classesToClustersMap[] = eval.getClassesToClusters(); for (int i = 0; i < classesToClustersMap.length; i++) { if (assignments.get(i).equals(((Integer) classesToClustersMap[(int) data.get(i).classValue()]))) { ((Instances) sampled_data).add(data.get(i)); } } return ((Instances) sampled_data); }