Example usage for weka.classifiers.meta RegressionByDiscretization setNumBins

Introduction

In this page you can find the example usage for weka.classifiers.meta RegressionByDiscretization setNumBins.

Prototype

public void setNumBins(int numBins)

Source Link

Document

Sets the number of bins to divide each selected numeric attribute into

Usage

From source file:com.openkm.kea.filter.KEAFilter.java

License:Open Source License

/**
 * Builds the classifier./* ww  w .j  av a2  s. c  om*/
 */
// aly: The main function, where everything important happens
private void buildClassifier() throws Exception {
    // Generate input format for classifier
    FastVector atts = new FastVector();
    for (int i = 0; i < getInputFormat().numAttributes(); i++) {
        if (i == m_DocumentAtt) {
            atts.addElement(new Attribute("TFxIDF"));
            atts.addElement(new Attribute("First_occurrence"));
            if (m_KFused) {
                atts.addElement(new Attribute("Keyphrase_frequency"));
            }
            if (m_STDEVfeature) {
                atts.addElement(new Attribute("Standard_deviation"));
            }
            if (m_NODEfeature) {
                atts.addElement(new Attribute("Relations_number"));
            }
            if (m_LENGTHfeature) {
                atts.addElement(new Attribute("Phrase_length"));
            }
        } else if (i == m_KeyphrasesAtt) {
            FastVector vals = new FastVector(2);
            vals.addElement("False");
            vals.addElement("True");
            //atts.addElement(new Attribute("Keyphrase?", vals));
            atts.addElement(new Attribute("Keyphrase?"));
        }
    }
    m_ClassifierData = new Instances("ClassifierData", atts, 0);
    m_ClassifierData.setClassIndex(m_NumFeatures);

    if (m_Debug) {
        log.info("--- Converting instances for classifier");
    }
    // Convert pending input instances into data for classifier
    for (int i = 0; i < getInputFormat().numInstances(); i++) {
        Instance current = getInputFormat().instance(i);

        // Get the key phrases for the document
        String keyphrases = current.stringValue(m_KeyphrasesAtt);
        HashMap<String, Counter> hashKeyphrases = getGivenKeyphrases(keyphrases, false);
        HashMap<String, Counter> hashKeysEval = getGivenKeyphrases(keyphrases, true);

        // Get the phrases for the document
        HashMap<String, FastVector> hash = new HashMap<String, FastVector>();
        int length = getPhrases(hash, current.stringValue(m_DocumentAtt));
        // hash = getComposits(hash);

        // Compute the feature values for each phrase and
        // add the instance to the data for the classifier

        Iterator<String> it = hash.keySet().iterator();
        while (it.hasNext()) {
            String phrase = it.next();
            FastVector phraseInfo = (FastVector) hash.get(phrase);

            double[] vals = featVals(phrase, phraseInfo, true, hashKeysEval, hashKeyphrases, length, hash);
            //log.info(vals);
            Instance inst = new Instance(current.weight(), vals);
            // .err.println(phrase + "\t" + inst.toString());
            m_ClassifierData.add(inst);
        }
    }

    if (m_Debug) {
        log.info("--- Building classifier");
    }

    // Build classifier

    // Uncomment if you want to use a different classifier
    // Caution: Other places in the code will have to be adjusted!!
    /*I. Naive Bayes:
     FilteredClassifier fclass = new FilteredClassifier();      
     fclass.setClassifier(new weka.classifiers.bayes.NaiveBayesSimple());
     fclass.setFilter(new Discretize());
     m_Classifier = fclass;
     */

    //NaiveBayes nb = new NaiveBayes();
    //nb.setUseSupervisedDiscretization(true);
    //m_Classifier = nb;

    /* II. Linear Regression:
     LinearRegression lr = new LinearRegression();   
     lr.setAttributeSelectionMethod(new 
     weka.core.SelectedTag(1, LinearRegression.TAGS_SELECTION));
     lr.setEliminateColinearAttributes(false);
     lr.setDebug(false);
             
     m_Classifier = lr;*/

    /* III. Bagging with REPTrees
     Bagging bagging = new Bagging();   
             
     String[] ops_bagging = {
     new String("-P"),
     new String("100"),
     new String("-S"), 
     new String("1"),
     new String("-I"), 
     new String("50")};
             
     */

    /*
     * REPTree rept = new REPTree();
     //results are worse!
      rept.setNoPruning(true);
      String[] ops_rept = {
      new String("-M"), 
      new String("2"),
      new String("-V"), 
      new String("0.0010"),            
      new String("-N"), 
      new String("3"),
      new String("-S"), 
      new String("1"),
      new String("-L"), 
      new String("1"),};
              
      rept.setOptions(ops_rept);
      bagging.setClassifier(rept);
      */

    //   bagging.setOptions(ops_bagging);
    //FilteredClassifier fclass = new FilteredClassifier();      
    //fclass.setClassifier(new REPTree());
    //fclass.setFilter(new Discretize());
    //bagging.setClassifier(fclass);
    //   m_Classifier = bagging;

    RegressionByDiscretization rvd = new RegressionByDiscretization();
    FilteredClassifier fclass = new FilteredClassifier();
    fclass.setClassifier(new weka.classifiers.bayes.NaiveBayesSimple());
    fclass.setFilter(new Discretize());

    rvd.setClassifier(fclass);
    rvd.setNumBins(m_Indexers + 1);
    m_Classifier = rvd;

    // log.info(m_ClassifierData);   
    //System.exit(1);
    m_Classifier.buildClassifier(m_ClassifierData);

    if (m_Debug) {
        log.info("" + m_Classifier);
    }

    // Save space
    m_ClassifierData = new Instances(m_ClassifierData, 0);
}