KFST.featureSelection.embedded.TreeBasedMethods.DecisionTreeBasedMethod.java Source code

Introduction

Here is the source code for KFST.featureSelection.embedded.TreeBasedMethods.DecisionTreeBasedMethod.java
Source

/*
 * Kurdistan Feature Selection Tool (KFST) is an open-source tool, developed
 * completely in Java, for performing feature selection process in different
 * areas of research.
 * For more information about KFST, please visit:
 *     http://kfst.uok.ac.ir/index.html
 *
 * Copyright (C) 2016-2018 KFST development team at University of Kurdistan,
 * Sanandaj, Iran.
 *
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program.  If not, see <http://www.gnu.org/licenses/>.
 */
package KFST.featureSelection.embedded.TreeBasedMethods;

import KFST.gui.featureSelection.embedded.decisionTreeBased.TreeType;
import KFST.util.ArraysFunc;
import KFST.util.FileFunc;
import java.io.BufferedReader;
import java.io.FileReader;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.logging.Level;
import java.util.logging.Logger;
import weka.classifiers.trees.J48;
import weka.classifiers.trees.RandomTree;
import weka.core.Instances;

/**
 * This java class is used to implement the decision tree based methods.
 *
 * @author Sina Tabakhi
 * @see KFST.featureSelection.embedded.TreeBasedMethods
 * @see KFST.featureSelection.embedded.EmbeddedApproach
 * @see KFST.featureSelection.FeatureSelection
 */
public class DecisionTreeBasedMethod extends TreeBasedMethods {

    private double confidenceValue;
    private int minNumSampleInLeaf;
    private int randomTreeKValue;
    private int randomTreeMaxDepth;
    private double randomTreeMinNum;
    private double randomTreeMinVarianceProp;

    /**
     * initializes the parameters
     *
     * @param arguments array of parameter contains
     * <p>
     * if the type of tree is C4.5 (<code>path</code>, <code>tree type</code>,
     * <code>confidenceValue</code>, <code>minNumSampleInLeaf</code>) in which
     * <code><b><i>path</i></b></code> is the path of the project,
     * <code><b><i>tree type</i></b></code> is the type of tree,
     * <code><b><i>confidenceValue</i></b></code> is the confidence factor used
     * for pruning, <code><b><i>minNumSampleInLeaf</i></b></code> is the minimum
     * number of samples per leaf
     *
     * <p>
     * if the type of tree is random tree (<code>path</code>,
     * <code>tree type</code>, <code>KValue</code>, <code>MaxDepth</code>,
     * <code>MinNum,</code>, <code>MinVarianceProp</code>) in which
     * <code><b><i>path</i></b></code> is the path of the project,
     * <code><b><i>tree type</i></b></code> is the type of tree,
     * <code><b><i>KValue</i></b></code> is the number of randomly chosen
     * attributes, <code><b><i>MaxDepth</i></b></code> is the maximum depth of
     * the tree, <code><b><i>MinNum</i></b></code> is the minimum total weight
     * of the instances in a leaf, <code><b><i>MinVarianceProp</i></b></code> is
     * the minimum proportion of the total variance
     */
    public DecisionTreeBasedMethod(Object... arguments) {
        super(arguments);
        if (TREE_TYPE == TreeType.C45) {
            confidenceValue = (double) arguments[2];
            minNumSampleInLeaf = (int) arguments[3];
        } else if (TREE_TYPE == TreeType.RANDOM_TREE) {
            randomTreeKValue = (int) arguments[2];
            randomTreeMaxDepth = (int) arguments[3];
            randomTreeMinNum = (double) arguments[4];
            randomTreeMinVarianceProp = (double) arguments[5];
        }
    }

    /**
     * initializes the parameters
     *
     * @param path the path of the project
     * @param confidence the confidence factor used for pruning
     * @param minNum the minimum number of samples per leaf
     */
    public DecisionTreeBasedMethod(String path, double confidence, int minNum) {
        super(path, TreeType.C45);
        this.confidenceValue = confidence;
        this.minNumSampleInLeaf = minNum;
    }

    /**
     * initializes the parameters
     *
     * @param path the path of the project
     * @param randomTreeKValue the number of randomly chosen attributes
     * @param randomTreeMaxDepth the maximum depth of the tree
     * @param randomTreeMinNum the minimum total weight of the instances in a
     * leaf
     * @param randomTreeMinVarianceProp the minimum proportion of the total
     * variance (over all the data) required for split
     */
    public DecisionTreeBasedMethod(String path, int randomTreeKValue, int randomTreeMaxDepth,
            double randomTreeMinNum, double randomTreeMinVarianceProp) {
        super(path, TreeType.RANDOM_TREE);
        this.randomTreeKValue = randomTreeKValue;
        this.randomTreeMaxDepth = randomTreeMaxDepth;
        this.randomTreeMinNum = randomTreeMinNum;
        this.randomTreeMinVarianceProp = randomTreeMinVarianceProp;
    }

    /**
     * find the feature subset from the nodes of the created tree (Used for C4.5
     * and Random Tree)
     *
     * @param tree the generated tree based on the train set
     */
    @Override
    protected void selectedFeatureSubset(String tree) {
        String[] lines = tree.split("\n");
        ArrayList<Integer> featureSubset = new ArrayList<>();

        for (String line : lines) {
            line = line.replace("|   ", " ").trim();
            if (line.lastIndexOf(" <= ") != -1) {
                line = line.substring(0, line.lastIndexOf(" <= "));
            } else if (line.lastIndexOf(" >= ") != -1) {
                line = line.substring(0, line.lastIndexOf(" >= "));
            } else if (line.lastIndexOf(" = ") != -1) {
                line = line.substring(0, line.lastIndexOf(" = "));
            } else if (line.lastIndexOf(" > ") != -1) {
                line = line.substring(0, line.lastIndexOf(" > "));
            } else if (line.lastIndexOf(" < ") != -1) {
                line = line.substring(0, line.lastIndexOf(" < "));
            } else {
                line = "";
            }

            line = line.trim();

            if (line.length() != 0) {
                int index = Arrays.asList(nameFeatures).indexOf(line);
                if (!featureSubset.contains(index)) {
                    featureSubset.add(index);
                }
            }
        }

        this.setNumSelectedFeature(featureSubset.size());

        for (int i = 0; i < numSelectedFeature; i++) {
            selectedFeatureSubset[i] = featureSubset.get(i);
        }

        ArraysFunc.sortArray1D(selectedFeatureSubset, false);

        //        for (int i = 0; i < numSelectedFeature; i++) {
        //            System.out.println("ranked  = " + selectedFeatureSubset[i]);
        //        }
    }

    /**
     * {@inheritDoc }
     */
    @Override
    protected String buildClassifier(Instances dataTrain) {
        try {
            if (TREE_TYPE == TreeType.C45) {
                J48 decisionTreeC45 = new J48();
                decisionTreeC45.setConfidenceFactor((float) confidenceValue);
                decisionTreeC45.setMinNumObj(minNumSampleInLeaf);
                decisionTreeC45.buildClassifier(dataTrain);
                return decisionTreeC45.toString();
            } else if (TREE_TYPE == TreeType.RANDOM_TREE) {
                RandomTree decisionTreeRandomTree = new RandomTree();
                decisionTreeRandomTree.setKValue(randomTreeKValue);
                decisionTreeRandomTree.setMaxDepth(randomTreeMaxDepth);
                decisionTreeRandomTree.setMinNum(randomTreeMinNum);
                decisionTreeRandomTree.setMinVarianceProp(randomTreeMinVarianceProp);
                decisionTreeRandomTree.buildClassifier(dataTrain);
                return decisionTreeRandomTree.toString();
            }
        } catch (Exception ex) {
            Logger.getLogger(DecisionTreeBasedMethod.class.getName()).log(Level.SEVERE, null, ex);
        }
        return "";
    }

    /**
     * starts the feature selection process by Decision Tree based methods
     */
    @Override
    public void evaluateFeatures() {
        FileFunc.createDirectory(TEMP_PATH);
        String nameDataCSV = TEMP_PATH + "dataCSV.csv";
        String nameDataARFF = TEMP_PATH + "dataARFF.arff";

        FileFunc.createCSVFile(trainSet, originalFeatureSet(), nameDataCSV, nameFeatures, classLabel);
        FileFunc.convertCSVtoARFF(nameDataCSV, nameDataARFF, TEMP_PATH, numFeatures, numFeatures, nameFeatures,
                numClass, classLabel);

        try {
            BufferedReader readerTrain = new BufferedReader(new FileReader(nameDataARFF));
            Instances dataTrain = new Instances(readerTrain);
            readerTrain.close();
            dataTrain.setClassIndex(dataTrain.numAttributes() - 1);

            selectedFeatureSubset(buildClassifier(dataTrain));
            //System.out.println(buildClassifier(dataTrain));
        } catch (Exception ex) {
            Logger.getLogger(DecisionTreeBasedMethod.class.getName()).log(Level.SEVERE, null, ex);
        }
        FileFunc.deleteDirectoryWithAllFiles(TEMP_PATH);
    }
}