keel.Algorithms.Neural_Networks.NNEP_Common.data.DoubleTransposedDataSet.java Source code

Java tutorial

Introduction

Here is the source code for keel.Algorithms.Neural_Networks.NNEP_Common.data.DoubleTransposedDataSet.java

Source

/***********************************************************************
    
   This file is part of KEEL-software, the Data Mining tool for regression, 
   classification, clustering, pattern mining and so on.
    
   Copyright (C) 2004-2010
       
   F. Herrera (herrera@decsai.ugr.es)
L. Snchez (luciano@uniovi.es)
J. Alcal-Fdez (jalcala@decsai.ugr.es)
S. Garca (sglopez@ujaen.es)
A. Fernndez (alberto.fernandez@ujaen.es)
J. Luengo (julianlm@decsai.ugr.es)
    
   This program is free software: you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
   the Free Software Foundation, either version 3 of the License, or
   (at your option) any later version.
    
   This program is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   GNU General Public License for more details.
    
   You should have received a copy of the GNU General Public License
   along with this program.  If not, see http://www.gnu.org/licenses/
      
**********************************************************************/

package keel.Algorithms.Neural_Networks.NNEP_Common.data;

import java.io.BufferedReader;
import java.io.EOFException;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.IOException;

import net.sf.jclec.IConfigure;

import org.apache.commons.configuration.Configuration;

/**
 * <p>
 * @author Written by Pedro Antonio Gutierrez Penna, Aaron Ruiz Mora (University of Cordoba) 17/07/2007
 * @version 0.1
 * @since JDK1.5
 * </p>
 */

public class DoubleTransposedDataSet implements IConfigure {
    /**
     * <p>
     * Set of data of a problem
     * </p>
     */

    /////////////////////////////////////////////////////////////////
    // --------------------------------------- Serialization constant
    /////////////////////////////////////////////////////////////////

    /** Generated by Eclipse */
    private static final long serialVersionUID = -7161371989002786655L;

    /////////////////////////////////////////////////////////////////
    // --------------------------------------------------- Attributes
    /////////////////////////////////////////////////////////////////

    /** Number of the file to extract the observations */
    String fileName;

    /** Number of observations (Matrix columns) */
    protected int nofobservations = -1;

    /** Number of variables (Matrix rows) */
    protected int nofvariables = -1; //nofvariables = nofinputs+nofoutputs

    /** Number of inputs */
    protected int nofinputs = -1;

    /** Number of outputs */
    protected int nofoutputs = -1;

    /** Array with all data */
    protected double[][] array;

    /** Array with the mean of each output */
    protected double[] outputMeans;

    /** Maximum distance between data */
    protected double maximumDistance;

    /////////////////////////////////////////////////////////////////
    // -------------------------------------------------- Constructor
    /////////////////////////////////////////////////////////////////

    /**
     * Empty constructor
     */

    public DoubleTransposedDataSet() {
        super();
    }

    /////////////////////////////////////////////////////////////////
    // ------------------------------- Getting and setting attributes
    /////////////////////////////////////////////////////////////////

    /**
     * <p>
    * Returns the filename used to read the observations and parameters
    * </p>
    * @return String Filename
    */
    public String getFileName() {
        return fileName;
    }

    /**
     * <p>
    * Sets the filename used to read the observations and parameters
     * </p>
    * @param fileName Filename
    */
    public void setFileName(String fileName) {
        this.fileName = fileName;
    }

    /**
      * <p>
     * Returns the number of inputs of the observations stored in the data set
      * </p>
     * @return int Number of inputs
     */
    public int getNofinputs() {
        return nofinputs;
    }

    /**
      * <p>
     * Sets the number of inputs of the observations stored in the data set
      * </p>
     * @param nofinputs New number of inputs
     */
    public void setNofinputs(int nofinputs) {
        this.nofinputs = nofinputs;
        if (nofoutputs != -1 && nofobservations != -1)
            init();
    }

    /**
      * <p>
     * Returns the number of observations stored in the data set
      * </p>
     * @return int Number of observations
     */
    public int getNofobservations() {
        return nofobservations;
    }

    /**
      * <p>
     * Sets the number of observations stored in the data set
      * </p>
     * @param nofobservations New number of observations
     */
    public void setNofobservations(int nofobservations) {
        this.nofobservations = nofobservations;
        if (nofoutputs != -1 && nofinputs != -1)
            init();
    }

    /**
      * <p>
     * Returns the number of variables stored in the data set
     * </p>
     * @return int Number of variables
     */
    public int getNofvariables() {
        return nofvariables;
    }

    /**
      * <p>
     * Sets the number of variables stored in the data set
     * </p>
     * @param nofvariables New number of variables
     */
    public void setNofvariables(int nofvariables) {
        this.nofvariables = nofvariables;
    }

    /**
      * <p>
     * Returns the number of outputs of the observations stored in the data set
     * </p>
     * @return int Number of outputs
     */
    public int getNofoutputs() {
        return nofoutputs;
    }

    /**
      * <p>
     * Sets the number of outputs of the observations stored in the data set
     * </p>
     * @param nofoutputs New number of outputs
     */
    public void setNofoutputs(int nofoutputs) {
        this.nofoutputs = nofoutputs;
        if (nofinputs != -1 && nofobservations != -1)
            init();
    }

    /**
     * <p>
    * Returns an specified observation
    * </p>
    * @param nofobservation Number of observation to return
    * @return double [] Array with the specified observation
    */
    public double[] getObservation(int nofobservation) {
        double[] observation = new double[nofvariables];
        for (int i = 0; i < nofvariables; i++)
            observation[i] = array[i][nofobservation];
        return observation;
    }

    /**
     * <p>
    * Sets an specified observation
    * </p>
    * @param nofobservation Number of observation
    * @param observation New observation
    */
    public void setObservation(int nofobservation, double[] observation) {
        for (int i = 0; i < nofvariables; i++)
            array[i][nofobservation] = observation[i];
    }

    /**
     * <p>
    * Returns the outputs of an specified observation
    * </p>
    * @param nofobservation Number of the observation
    * @return double [] Array with the outputs of the observation
    */
    public double[] getOutputs(int nofobservation) {
        double[] outputs = new double[nofoutputs];
        for (int i = nofinputs; i < nofvariables; i++)
            outputs[i - nofinputs] = array[i][nofobservation];
        return outputs;
    }

    /**
     * <p>
    * Returns a matrix with all the outputs of the dataSet 
    * in rows.
    * </p>
    * @return double [][] Matrix with all the outputs of the dataSet
    */
    public double[][] getAllOutputs() {
        double[][] outputs = new double[nofoutputs][nofobservations];

        for (int i = nofinputs; i < nofvariables; i++)
            outputs[i - nofinputs] = array[i];

        return outputs;
    }

    /**
     * <p>
    * Returns the inputs of an specified observation
    * </p>
    * @param nofobservation Number of the observation
    * @return double [] Array with the inputs of the observation
    */
    public double[] getInputs(int nofobservation) {
        double[] inputs = new double[nofinputs];
        for (int i = 0; i < nofinputs; i++)
            inputs[i] = array[i][nofobservation];
        return inputs;
    }

    /**
     * <p>
    * Returns a matrix with all the inputs of the dataSet 
    * in rows
    * </p>
    * @return double [][] Matrix with all the inputs of the dataSet
    */
    public double[][] getAllInputs() {
        double[][] inputs = new double[nofinputs][nofobservations];

        for (int i = 0; i < nofinputs; i++)
            inputs[i] = array[i];

        return inputs;
    }

    /**
     * <p>
    * Returns all the values of an output in the data set
    * </p>
    * @param nofoutput Number of the output
    * @return double [] Array with all the values of the output
    */
    public double[] getOutput(int nofoutput) {
        return array[nofinputs + nofoutput];
    }

    /**
     * <p>
    * Sets all the values of an output in the data set
    * </p>
    * @param nofoutput Number of the output
    * @param values Double array with all the values of the output
    */
    public void setOutput(int nofoutput, double[] values) {
        array[nofinputs + nofoutput] = values;
    }

    /**
     * <p>
    * Returns all the values of a variable in the data set
    * </p>
    * @param nofvariable Number of the variable
    * @return double [] Array with all the values of the variable
    */
    public double[] getObservationsOf(int nofvariable) {
        return array[nofvariable];
    }

    /**
     * <p>
    * Sets all the values of a variable in the data set
    * </p>
    * @param nofvariable Number of the variable
    * @param values Double array with all the values of the variable
    */
    public void setObservationsOf(int nofvariable, double[] values) {
        array[nofvariable] = values;
    }

    /**
     * <p>
    * Returns the mean of a specific number of output
    * </p>
    * @param index Number of output mean to return
    * @return double Output mean
    */
    public double getOutputMean(int index) {
        return outputMeans[index];
    }

    /**
     * <p>
    * Returns the maximum value of a specific variable
    * </p>
    * @param index Number of variable maximum value to return
    * @return double Maximum value
    */
    public double getMaxValueOf(int index) {
        double max = array[index][0];

        for (int i = 0; i < nofobservations; i++)
            if (array[index][i] > max)
                max = array[index][i];

        return max;
    }

    /**
     * <p>
    * Returns the minimum value of a specific variable
    * </p>
    * @param index Number of variable minimum value to return
    * @return double Minimum value
    */
    public double getMinValueOf(int index) {
        double min = array[index][0];

        for (int i = 0; i < nofobservations; i++)
            if (array[index][i] < min)
                min = array[index][i];

        return min;
    }

    /**
     * <p>
    * Returns the maximum distance between train data
    * </p>
    * @return double Maximum distance
    */
    public double getMaximumDistance() {
        return maximumDistance;
    }

    /**
     * <p>
    * Sets the maximum distance between train data
    * </p>
    * @param maximumDistance New maximum distance
    */
    public void setMaximumDistance(double maximumDistance) {
        this.maximumDistance = maximumDistance;
    }

    /////////////////////////////////////////////////////////////////
    // ----------------------------------------------- Public methods
    /////////////////////////////////////////////////////////////////

    /**
      * <p>
     * Init the DoubleTransposedDataSet using a normal IDataset
     * </p>
     * @throws DatasetException
     * @param schema Schema of the dataset
     * @param dataset Dataset to read data of
     */
    public void read(byte[] schema, IDataset dataset) throws DatasetException {

        //Open dataset
        dataset.open();

        //Reads number of observations
        setNofobservations(dataset.numberOfInstances());

        //Reads number of inputs and outputs
        int nOfInputs = 0;
        int nOfOutputs = 0;

        for (int i = 0; i < schema.length; i++)
            if (schema[i] == 1)
                nOfInputs++;
            else if (schema[i] == 2)
                nOfOutputs++;

        //Metadata
        IMetadata metadata = dataset.getMetadata();

        //Transform categorical attributes
        for (int i = 0; i < metadata.numberOfAttributes(); i++) {
            if (metadata.getAttribute(i).getType() == AttributeType.Categorical) {
                int numberCategories = ((CategoricalAttribute) metadata.getAttribute(i)).getNumberCategories();
                if (numberCategories == 2 && schema[i] == 1)
                    numberCategories = 1;
                if (schema[i] == 1)
                    nOfInputs += (numberCategories - 1);
                else if (schema[i] == 2)
                    nOfOutputs += (numberCategories - 1);
            }
        }

        //Sets number of outputs and inputs
        setNofinputs(nOfInputs);
        setNofoutputs(nOfOutputs);

        //For each instance
        int inputCounter = 0;
        int outputCounter = 0;
        int instanceCounter = 0;
        while (dataset.next()) {
            IDataset.IInstance instancia = dataset.read();
            boolean lostValues = false;

            for (int i = 0; i < schema.length; i++) {
                double value = instancia.getValue(i);
                if (Double.isNaN(value))
                    lostValues = true;

                if (metadata.getAttribute(i).getType() != AttributeType.Categorical) {
                    if (schema[i] == 1)
                        array[inputCounter++][instanceCounter] = value;
                    else if (schema[i] == 2)
                        array[nOfInputs + (outputCounter++)][instanceCounter] = value;
                } else {
                    CategoricalAttribute attribute = (CategoricalAttribute) metadata.getAttribute(i);
                    int numberCategories = attribute.getNumberCategories();
                    if (numberCategories == 2 && schema[i] == 1)
                        numberCategories = 1;
                    if (schema[i] == 1)
                        for (int j = 1; j <= numberCategories; j++)
                            array[inputCounter++][instanceCounter] = ((value == j) ? 1 : 0);
                    else if (schema[i] == 2)
                        for (int j = 1; j <= numberCategories; j++)
                            array[nOfInputs + (outputCounter++)][instanceCounter] = ((value == j) ? 1 : 0);
                }
            }

            if (!lostValues)
                instanceCounter++;
            inputCounter = 0;
            outputCounter = 0;
        }

        if (instanceCounter < nofobservations) {
            nofobservations = instanceCounter;

            double[][] auxArray = array;
            array = new double[nofvariables][nofobservations];
            for (int i = 0; i < array.length; i++)
                System.arraycopy(auxArray[i], 0, array[i], 0, array[i].length);
        }

        dataset.close();
        calculateMeans();
    }

    /**
      * <p>
     * Init the array stored in the DataSet
     * </p>
     * @throws IOException, NumberFormatException
     */

    public void read() throws IOException, NumberFormatException {
        try {
            //DataInputStream to read of
            BufferedReader reader = new BufferedReader(new FileReader(fileName));

            //------------------//
            //Reading first line//
            //------------------//
            String values[] = reader.readLine().split("[\\s\\t]"); //Space or tab separated
            //Check the text format
            if (values.length < 3)
                throw new IOException("Illegal Text Format");
            //Reads number of observations
            setNofobservations(Integer.parseInt(values[0]));
            //Reads number of inputs
            setNofinputs(Integer.parseInt(values[1]));
            //Reads number of outputs
            setNofoutputs(Integer.parseInt(values[2]));

            //-------------------//
            //Reading second line//
            //-------------------//
            values = reader.readLine().split("[\\s\\t]"); //Space or tab separated
            //Count the number of real variables
            int nofrealvariables = 0;
            for (int i = 0; i < values.length; i++)
                if (Byte.parseByte(values[i]) == 1 || Byte.parseByte(values[i]) == 2)
                    nofrealvariables++;
            //Check the text format
            if (nofrealvariables != nofvariables)
                throw new IOException("Illegal Text Format");
            //Reads the input schema array
            byte iSchema[] = new byte[values.length];
            for (int i = 0; i < iSchema.length; i++)
                iSchema[i] = Byte.parseByte(values[i]);

            //-------------------//
            //Reading other lines//
            //-------------------//
            //Input Counter
            int ic = 0;
            //Output Counter
            int oc = 0;
            //For each observation
            for (int i = 0; i < nofobservations; i++) {
                //Read a line
                values = reader.readLine().split("[\\s\\t]"); //Space or tab
                //Check the text format
                if (values.length < iSchema.length)
                    throw new IOException("Illegal Text Format");
                //Read values
                for (int j = 0; j < iSchema.length; j++) {
                    //Read value
                    double value = Double.parseDouble(values[j]);
                    //If it is an input
                    if (iSchema[j] == 1) {
                        array[ic % nofinputs][i] = value;
                        ic++;
                    }
                    //If it is an output
                    if (iSchema[j] == 2) {
                        array[nofinputs + (oc % nofoutputs)][i] = value;
                        oc++;
                    }
                    //If (iSchema[j]!=2 && iSchema[j]!=1)
                    //    then the value is ignored
                }
            }
        } catch (EOFException e) {
            System.out.println("Illegal Text Format");
            throw new IOException("Illegal Text Format");
        } catch (NumberFormatException e) {
            System.out.println("Number format exception");
            throw e;
        } catch (FileNotFoundException e) {
            System.out.println("File not found");
            throw e;
        }

        calculateMeans();
    }

    /**
     * <p>
    * Obtain the means of all the outputs
    * </p>
    */
    public void calculateMeans() {
        //Obtain the means of the outputs
        if (outputMeans == null)
            outputMeans = new double[nofoutputs];
        for (int j = 0; j < nofoutputs; j++)
            outputMeans[j] = 0;
        for (int i = 0; i < nofobservations; i++) {
            for (int j = 0; j < nofoutputs; j++)
                outputMeans[j] += array[nofinputs + j][i];
        }
        for (int j = 0; j < nofoutputs; j++)
            outputMeans[j] /= nofobservations;

        //Obtain the maximum distance between data
        obtainMaximumDistance();
    }

    /**
      * <p>
     * Returns a string representation of the DataSet
     * </p>
     * @return String Representation of the DataSet
     */
    public String toString() {

        StringBuffer sb = new StringBuffer();
        sb.append("<DataSet>\n");
        sb.append("<nofobservations>" + nofobservations + "</nofobservations>\n");
        sb.append("<nofinputs>" + nofinputs + "</nofinputs>\n");
        sb.append("<nofoutputs>" + nofoutputs + "</nofoutputs>\n");
        sb.append("<observations>\n");
        for (int i = 0; i < nofobservations; i++) {
            for (int j = 0; j < nofvariables; j++)
                sb.append(array[j][i] + " ");
            sb.append("\n");
        }
        sb.append("</observations>\n");
        sb.append("</DataSet>");
        return sb.toString();
    }

    /**
      * <p>
     * Returns a copy of the DataSet
     * </p>
     * @return DataSet Copy of the DataSet
     */
    public DoubleTransposedDataSet copy() {
        DoubleTransposedDataSet result = new DoubleTransposedDataSet();
        //Set the fileName
        result.fileName = this.fileName;
        //Copy number of observations
        result.setNofobservations(this.nofobservations);
        //Copy number of inputs
        result.setNofinputs(this.nofinputs);
        //Copy number of outputs
        result.setNofoutputs(this.nofoutputs);
        //Copy the array
        for (int i = 0; i < nofvariables; i++) {
            System.arraycopy(this.array[i], 0, result.array[i], 0, nofobservations);
        }
        //Calculate means
        result.calculateMeans();

        return result;
    }

    /**
      * <p>
     * Obtain a boolean array with true at these inputs that are constants
     * </p>
     * @return boolean [] Constant inputs
     */
    public boolean[] obtainConstantsInputs() {
        boolean[] toRemove = new boolean[nofinputs];

        //Obtain constant inputs
        for (int i = 0; i < nofinputs; i++) {
            toRemove[i] = false;
            double value = array[i][0];
            int j = 1;
            while (j < nofobservations && array[i][j] == value)
                j++;
            if (j == nofobservations)
                toRemove[i] = true;
        }

        return toRemove;
    }

    /**
      * <p>
     * Remove the inputs desired
     * </p>
     * @param toRemove Array of Boolean indicating constant inputs with true
     * @param newNofinputs New number of inputs of the dataset
     */
    public void removeInputs(boolean[] toRemove, int newNofinputs) {
        //Auxiliary copy
        DoubleTransposedDataSet aux = copy();

        //Remove inputs
        setNofinputs(newNofinputs);

        //Copy the array
        for (int i = 0, j = 0; i < aux.nofvariables; i++) {
            if (i >= aux.nofinputs || (i < aux.nofinputs && !toRemove[i])) {
                System.arraycopy(aux.array[i], 0, this.array[j], 0, nofobservations);
                j++;
            }
        }
    }

    /////////////////////////////////////////////////////////////////
    // ---------------------------------------------- Private methods
    /////////////////////////////////////////////////////////////////

    /**
      * <p>
     * Init the array stored in the DataSet
     * </p>
     */
    private void init() {
        setNofvariables(nofinputs + nofoutputs);
        array = new double[nofvariables][nofobservations];
    }

    /**
     * <p>
     * Obtain the largest distance between the input data
    * </p>
     */
    private void obtainMaximumDistance() {
        maximumDistance = 0;

        for (int i = 0; i < getNofobservations(); i++) {
            for (int j = i + 1; j < getNofobservations(); j++) {

                double distance = 0;

                // Calculate the distance between two data
                for (int k = 0; k < getNofinputs(); k++) {

                    double firstComponent = getInputs(i)[k]; // First data
                    double secondComponent = getInputs(j)[k]; // Second data

                    distance += Math.pow(firstComponent - secondComponent, 2.0);
                }

                distance = Math.sqrt(distance);

                // Get the largest distances
                if (distance > maximumDistance)
                    maximumDistance = distance;
            }
        }
    }

    /////////////////////////////////////////////////////////////////
    // ---------------------------- Implementing IConfigure interface
    /////////////////////////////////////////////////////////////////

    /**
      * <p>
     * Configuration parameters for this data set are:
     * 
     * <ul>
     * <li>
     * <code>[@file-name] (String)</code></p>
     *  File name. Name of the file that stores the neccesary information
     *             for this data set.
     * </li>
     * </ul> 
     * </p>
     * @param settings Configuration object from which the properties are going to be read
     */

    public void configure(Configuration settings) {
        // ----------------------------------------- Setup fileName
        fileName = settings.getString("[@file-name]");
    }
}