List of usage examples for weka.classifiers.bayes NaiveBayesSimple NaiveBayesSimple
NaiveBayesSimple
From source file:kea.KEAFilter.java
License:Open Source License
/** * Builds the classifier./*ww w . ja v a 2 s.c o m*/ */ private void buildClassifier() throws Exception { // Generate input format for classifier FastVector atts = new FastVector(); for (int i = 0; i < getInputFormat().numAttributes(); i++) { if (i == m_DocumentAtt) { atts.addElement(new Attribute("TFxIDF")); atts.addElement(new Attribute("First_occurrence")); if (m_KFused) { atts.addElement(new Attribute("Keyphrase_frequency")); } } else if (i == m_KeyphrasesAtt) { FastVector vals = new FastVector(2); vals.addElement("False"); vals.addElement("True"); atts.addElement(new Attribute("Keyphrase?", vals)); } } m_ClassifierData = new Instances("ClassifierData", atts, 0); m_ClassifierData.setClassIndex(m_NumFeatures); if (m_Debug) { System.err.println("--- Converting instances for classifier"); } // Convert pending input instances into data for classifier for (int i = 0; i < getInputFormat().numInstances(); i++) { Instance current = getInputFormat().instance(i); // Get the key phrases for the document String keyphrases = current.stringValue(m_KeyphrasesAtt); HashMap hashKeyphrases = getGivenKeyphrases(keyphrases, false); HashMap hashKeysEval = getGivenKeyphrases(keyphrases, true); // Get the phrases for the document HashMap hash = new HashMap(); int length = getPhrases(hash, current.stringValue(m_DocumentAtt)); // Compute the feature values for each phrase and // add the instance to the data for the classifier Iterator it = hash.keySet().iterator(); while (it.hasNext()) { String phrase = (String) it.next(); FastVector phraseInfo = (FastVector) hash.get(phrase); double[] vals = featVals(phrase, phraseInfo, true, hashKeysEval, hashKeyphrases, length); Instance inst = new Instance(current.weight(), vals); m_ClassifierData.add(inst); } } if (m_Debug) { System.err.println("--- Building classifier"); } // Build classifier FilteredClassifier fclass = new FilteredClassifier(); fclass.setClassifier(new NaiveBayesSimple()); fclass.setFilter(new Discretize()); m_Classifier = fclass; m_Classifier.buildClassifier(m_ClassifierData); if (m_Debug) { System.err.println(m_Classifier); } // Save space m_ClassifierData = new Instances(m_ClassifierData, 0); }