KFST.featureSelection.embedded.SVMBasedMethods.SVMBasedMethods.java Source code

Introduction

Here is the source code for KFST.featureSelection.embedded.SVMBasedMethods.SVMBasedMethods.java
Source

/*
 * Kurdistan Feature Selection Tool (KFST) is an open-source tool, developed
 * completely in Java, for performing feature selection process in different
 * areas of research.
 * For more information about KFST, please visit:
 *     http://kfst.uok.ac.ir/index.html
 *
 * Copyright (C) 2016-2018 KFST development team at University of Kurdistan,
 * Sanandaj, Iran.
 *
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program.  If not, see <http://www.gnu.org/licenses/>.
 */
package KFST.featureSelection.embedded.SVMBasedMethods;

import KFST.classifier.WekaSVMKernel;
import KFST.featureSelection.embedded.EmbeddedApproach;
import KFST.gui.classifier.svmClassifier.SVMKernelType;
import KFST.util.ArraysFunc;
import KFST.util.FileFunc;
import java.io.BufferedReader;
import java.io.FileReader;
import java.util.ArrayList;
import java.util.logging.Level;
import java.util.logging.Logger;
import weka.classifiers.functions.SMO;
import weka.core.Instances;

/**
 * The abstract class contains the main methods and fields that are used in all
 * SVM-based feature selection methods. This class inherits from
 * EmbeddedApproach class.
 *
 * @author Sina Tabakhi
 * @see KFST.featureSelection.embedded.EmbeddedApproach
 * @see KFST.featureSelection.FeatureSelection
 */
public abstract class SVMBasedMethods extends EmbeddedApproach {

    protected final String TEMP_PATH;
    protected SVMKernelType kernelType;
    protected double parameterC;
    protected double[] classLabelInTrainSet;

    /**
     * initializes the parameters
     *
     * @param arguments array of parameters contains (<code>path</code>,
     * <code>kernelType</code>, <code>Parameter c</code>) in which
     * <code><b><i>path</i></b></code> is the path of the project,
     * <code><b><i>kernelType</i></b></code> is the type of kernel to use, and
     * <code><b><i>Parameter c</i></b></code> is the complexity parameter C
     */
    public SVMBasedMethods(Object... arguments) {
        super((String) arguments[0]);
        kernelType = (SVMKernelType) arguments[1];
        parameterC = (double) arguments[2];
        TEMP_PATH = PROJECT_PATH + "Temp\\";
    }

    /**
     * initializes the parameters
     *
     * @param path the path of the project
     * @param kernelType the type of kernel to use
     * @param c the complexity parameter C
     */
    public SVMBasedMethods(String path, SVMKernelType kernelType, double c) {
        super(path);
        this.kernelType = kernelType;
        this.parameterC = c;
        TEMP_PATH = PROJECT_PATH + "Temp\\";
    }

    /**
     * creates an array of class labels available in the train set
     */
    public void createClassLabel() {
        ArrayList<Double> labels = new ArrayList<>();
        for (double[] sample : trainSet) {
            if (!labels.contains(sample[this.numFeatures])) {
                labels.add(sample[this.numFeatures]);
            }
        }

        this.classLabelInTrainSet = new double[this.numClass];
        for (int i = 0; i < labels.size(); i++) {
            this.classLabelInTrainSet[i] = labels.get(i);
        }
    }

    /**
     * generates binary classifiers (SVM) using input data and based on selected
     * feature subset, and finally returns the weights of features.
     * One-Versus-One strategy is used to construct classifiers in multiclass
     * classification.
     *
     * @param selectedFeature an array of indices of the selected feature subset
     *
     * @return an array of the weights of features
     */
    protected double[][][] buildSVM_OneAgainstOne(int[] selectedFeature) {
        String nameDataCSV = TEMP_PATH + "dataCSV.csv";
        String nameDataARFF = TEMP_PATH + "dataARFF.arff";
        double[][][] weights = new double[numClass][numClass][selectedFeature.length];

        FileFunc.createCSVFile(trainSet, selectedFeature, nameDataCSV, nameFeatures, classLabel);
        FileFunc.convertCSVtoARFF(nameDataCSV, nameDataARFF, TEMP_PATH, selectedFeature.length, numFeatures,
                nameFeatures, numClass, classLabel);

        try {
            BufferedReader readerTrain = new BufferedReader(new FileReader(nameDataARFF));
            Instances dataTrain = new Instances(readerTrain);
            readerTrain.close();
            dataTrain.setClassIndex(dataTrain.numAttributes() - 1);

            SMO svm = new SMO();
            svm.setC(parameterC);
            svm.setKernel(WekaSVMKernel.parse(kernelType));
            svm.buildClassifier(dataTrain);

            for (int i = 0; i < numClass; i++) {
                for (int j = i + 1; j < numClass; j++) {
                    double[] weightsSparse = svm.sparseWeights()[i][j];
                    int[] indicesSparse = svm.sparseIndices()[i][j];
                    for (int k = 0; k < weightsSparse.length; k++) {
                        weights[i][j][indicesSparse[k]] = weightsSparse[k];
                    }
                }
            }
        } catch (Exception ex) {
            Logger.getLogger(SVMBasedMethods.class.getName()).log(Level.SEVERE, null, ex);
        }

        return weights;
    }

    /**
     * generates binary classifiers (SVM) using input data and based on selected
     * feature subset, and finally returns the weights of features.
     * One-Versus-All strategy is used to construct classifiers in multiclass
     * classification.
     *
     * @param selectedFeature an array of indices of the selected feature subset
     *
     * @return an array of the weights of features
     */
    protected double[][] buildSVM_OneAgainstRest(int[] selectedFeature) {
        double[][] weights = new double[numClass][selectedFeature.length];
        String[] tempClassLabel = new String[] { "c1", "c2" };

        for (int indexClass = 0; indexClass < numClass; indexClass++) {
            double[][] copyTrainSet = ArraysFunc.copyDoubleArray2D(trainSet);
            String nameDataCSV = TEMP_PATH + "dataCSV" + indexClass + ".csv";
            String nameDataARFF = TEMP_PATH + "dataARFF" + indexClass + ".arff";

            for (double[] dataRow : copyTrainSet) {
                if (dataRow[numFeatures] == classLabelInTrainSet[indexClass]) {
                    dataRow[numFeatures] = 0;
                } else {
                    dataRow[numFeatures] = 1;
                }
            }

            FileFunc.createCSVFile(copyTrainSet, selectedFeature, nameDataCSV, nameFeatures, tempClassLabel);
            FileFunc.convertCSVtoARFF(nameDataCSV, nameDataARFF, TEMP_PATH, selectedFeature.length, numFeatures,
                    nameFeatures, tempClassLabel.length, tempClassLabel);

            try {
                BufferedReader readerTrain = new BufferedReader(new FileReader(nameDataARFF));
                Instances dataTrain = new Instances(readerTrain);
                readerTrain.close();
                dataTrain.setClassIndex(dataTrain.numAttributes() - 1);

                SMO svm = new SMO();
                svm.setC(parameterC);
                svm.setKernel(WekaSVMKernel.parse(kernelType));
                svm.buildClassifier(dataTrain);

                double[] weightsSparse = svm.sparseWeights()[0][1];
                int[] indicesSparse = svm.sparseIndices()[0][1];
                for (int k = 0; k < weightsSparse.length; k++) {
                    weights[indexClass][indicesSparse[k]] = weightsSparse[k];
                }
            } catch (Exception ex) {
                Logger.getLogger(SVMBasedMethods.class.getName()).log(Level.SEVERE, null, ex);
            }
        }

        return weights;
    }
}