mlda.util.Utils.java Source code

Java tutorial

Introduction

Here is the source code for mlda.util.Utils.java

Source

/*
 * This file is part of the MLDA.
 *
 * (c)  Jose Maria Moyano Murillo
 *      Eva Lucrecia Gibaja Galindo
 *      Sebastian Ventura Soto <sventura@uco.es>
 *
 * For the full copyright and license information, please view the LICENSE
 * file that was distributed with this source code.
 */

package mlda.util;

import java.util.ArrayList;

import mulan.data.MultiLabelInstances;
import weka.core.Attribute;
import weka.core.Instance;
import weka.core.Instances;

/**
* Class implementing different utils for metrics calculation
*
* @author Jose Maria Moyano Murillo
*/
public class Utils {

    /**
     * Entropy of array values
     * 
     * @param array Array with values to calculate entropy
     * @return Entropy value
     */
    public static double entropy(int[] array) {
        double entropy = 0;
        int sum = 0;

        for (int i = 0; i < array.length; i++) {
            entropy -= lnFunc(array[i]);
            sum += array[i];
        }

        if (sum == 0) {
            return 0;
        } else {
            return ((entropy + lnFunc(sum)) / (sum * Math.log(array.length)));
        }
    }

    private static double lnFunc(double num) {
        if (num < 1e-6) {
            return 0;
        } else {
            return (num * Math.log(num));
        }
    }

    /**
     * Get number of labels associated with each instance
     * 
     * @param mlData Multi-label dataset
     * @return Array with the number of labels associated with each instance
     */
    public static int[] labelsForInstance(MultiLabelInstances mlData) {

        int nInstances = mlData.getNumInstances();
        int nLabels = mlData.getNumLabels();

        int[] labelsForInstance = new int[nInstances];

        int[] labelIndices = mlData.getLabelIndices();

        Instances instances = mlData.getDataSet();

        Instance inst;
        for (int i = 0; i < nInstances; i++) {
            inst = instances.get(i);

            for (int j = 0; j < nLabels; j++) {
                if (inst.value(labelIndices[j]) == 1) {
                    labelsForInstance[i]++;
                }
            }
        }

        return (labelsForInstance);
    }

    /**
     * Get array of ImbalancedFeature with labels frequency
     * 
     * @param dataset Multi-label dataset
     * @return Array of ImbalancedFeature with the labels frequency
     */
    public static ImbalancedFeature[] getAppearancesPerLabel(MultiLabelInstances dataset) {
        int[] labelIndices = dataset.getLabelIndices();

        ImbalancedFeature[] labels = new ImbalancedFeature[labelIndices.length];

        Instances instances = dataset.getDataSet();

        int appearances = 0;
        Attribute currentAtt;

        for (int i = 0; i < labelIndices.length; i++) {
            currentAtt = instances.attribute(labelIndices[i]);
            appearances = 0;

            for (int j = 0; j < instances.size(); j++) {
                if (instances.instance(j).value(currentAtt) == 1.0) {
                    appearances++;
                }
            }
            labels[i] = new ImbalancedFeature(currentAtt.name(), appearances);
        }

        return labels;
    }

    /**
     * Get array of ImbalancedFeature in desdendent order of frequency
     * 
     * @param labels Labels of the dataset as ImbalancedFeature objects
     * @return Array of ImbalancedFeature in desdendent order of frequency
     */
    public static ImbalancedFeature[] getSortedByFrequency(ImbalancedFeature[] labels) {
        ArrayList<ImbalancedFeature> listIF = new ArrayList<ImbalancedFeature>();

        for (int i = 0; i < labels.length; i++) {
            listIF.add(labels[i]);
        }

        ImbalancedFeature[] sorted = new ImbalancedFeature[labels.length];

        for (int i = 0; i < labels.length; i++) {
            sorted[i] = getMaxAppearance(listIF);
            listIF.remove(sorted[i]);
        }

        return sorted;
    }

    /**
     * Calculate IRs of the ImbalancedFeatures
     * 
     * @param dataset Multi-label dataset
     * @param labels Labels of the dataset as ImbalancedFeature objects
     * @return Array of ImbalancedFeature objects with calculated IR
     */
    public static ImbalancedFeature[] getImbalancedWithIR(MultiLabelInstances dataset, ImbalancedFeature[] labels) {
        int[] labelIndices = dataset.getLabelIndices();

        ImbalancedFeature[] labels_imbalanced = new ImbalancedFeature[labelIndices.length];

        Instances instances = dataset.getDataSet();

        int nOnes = 0, nZeros = 0, maxAppearance = 0;
        double IRIntraClass;
        double variance;
        double IRInterClass;
        double mean = dataset.getNumInstances() / 2;

        Attribute current;
        ImbalancedFeature currentLabel;

        for (int i = 0; i < labelIndices.length; i++) //for each label
        {
            nZeros = 0;
            nOnes = 0;
            current = instances.attribute(labelIndices[i]); //current label

            for (int j = 0; j < instances.size(); j++) //for each instance
            {
                if (instances.instance(j).value(current) == 1.0) {
                    nOnes++;
                } else {
                    nZeros++;
                }
            }

            try {
                if (nZeros == 0 || nOnes == 0) {
                    IRIntraClass = 0;
                } else if (nZeros > nOnes) {
                    IRIntraClass = (double) nZeros / nOnes;
                } else {
                    IRIntraClass = (double) nOnes / nZeros;
                }
            } catch (Exception e1) {
                IRIntraClass = 0;
            }

            variance = (Math.pow((nZeros - mean), 2) + Math.pow((nOnes - mean), 2)) / 2;

            currentLabel = getLabelByName(current.name(), labels);

            maxAppearance = labels[0].getAppearances();

            if (currentLabel.getAppearances() <= 0) {
                IRInterClass = Double.NaN;
            } else {
                IRInterClass = (double) maxAppearance / currentLabel.getAppearances();
            }

            labels_imbalanced[i] = new ImbalancedFeature(current.name(), currentLabel.getAppearances(),
                    IRInterClass, IRIntraClass, variance);
        }

        return labels_imbalanced;
    }

    /**
     * Get an ImbalancedFeature with the label given by name
     * 
     * @param labelname Name of the label
     * @param list Array of ImbalancedFeature
     * @return ImbalancedFeature according to the label name given
     */
    public static ImbalancedFeature getLabelByName(String labelname, ImbalancedFeature[] list) {
        for (int i = 0; i < list.length; i++) {
            if (labelname.equals(list[i].getName()))
                return list[i];
        }

        return null;
    }

    /**
     * Get the max appearance in the list
     * 
     * @param list List of ImbalancedFeature
     * @return Max value of appearance in the list
     */
    public static ImbalancedFeature getMaxAppearance(ArrayList<ImbalancedFeature> list) {
        ImbalancedFeature max = list.get(0);

        for (ImbalancedFeature current : list) {
            if (current.getAppearances() > max.getAppearances()) {
                max = current;
            }
        }

        return max;
    }

}