Java tutorial
/*********************************************************************** This file is part of KEEL-software, the Data Mining tool for regression, classification, clustering, pattern mining and so on. Copyright (C) 2004-2010 F. Herrera (herrera@decsai.ugr.es) L. Snchez (luciano@uniovi.es) J. Alcal-Fdez (jalcala@decsai.ugr.es) S. Garca (sglopez@ujaen.es) A. Fernndez (alberto.fernandez@ujaen.es) J. Luengo (julianlm@decsai.ugr.es) This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see http://www.gnu.org/licenses/ **********************************************************************/ package keel.Algorithms.Neural_Networks.NNEP_Common.data; import java.io.BufferedReader; import java.io.EOFException; import java.io.FileNotFoundException; import java.io.FileReader; import java.io.IOException; import net.sf.jclec.IConfigure; import org.apache.commons.configuration.Configuration; /** * <p> * @author Written by Pedro Antonio Gutierrez Penna, Aaron Ruiz Mora (University of Cordoba) 17/07/2007 * @version 0.1 * @since JDK1.5 * </p> */ public class DoubleTransposedDataSet implements IConfigure { /** * <p> * Set of data of a problem * </p> */ ///////////////////////////////////////////////////////////////// // --------------------------------------- Serialization constant ///////////////////////////////////////////////////////////////// /** Generated by Eclipse */ private static final long serialVersionUID = -7161371989002786655L; ///////////////////////////////////////////////////////////////// // --------------------------------------------------- Attributes ///////////////////////////////////////////////////////////////// /** Number of the file to extract the observations */ String fileName; /** Number of observations (Matrix columns) */ protected int nofobservations = -1; /** Number of variables (Matrix rows) */ protected int nofvariables = -1; //nofvariables = nofinputs+nofoutputs /** Number of inputs */ protected int nofinputs = -1; /** Number of outputs */ protected int nofoutputs = -1; /** Array with all data */ protected double[][] array; /** Array with the mean of each output */ protected double[] outputMeans; /** Maximum distance between data */ protected double maximumDistance; ///////////////////////////////////////////////////////////////// // -------------------------------------------------- Constructor ///////////////////////////////////////////////////////////////// /** * Empty constructor */ public DoubleTransposedDataSet() { super(); } ///////////////////////////////////////////////////////////////// // ------------------------------- Getting and setting attributes ///////////////////////////////////////////////////////////////// /** * <p> * Returns the filename used to read the observations and parameters * </p> * @return String Filename */ public String getFileName() { return fileName; } /** * <p> * Sets the filename used to read the observations and parameters * </p> * @param fileName Filename */ public void setFileName(String fileName) { this.fileName = fileName; } /** * <p> * Returns the number of inputs of the observations stored in the data set * </p> * @return int Number of inputs */ public int getNofinputs() { return nofinputs; } /** * <p> * Sets the number of inputs of the observations stored in the data set * </p> * @param nofinputs New number of inputs */ public void setNofinputs(int nofinputs) { this.nofinputs = nofinputs; if (nofoutputs != -1 && nofobservations != -1) init(); } /** * <p> * Returns the number of observations stored in the data set * </p> * @return int Number of observations */ public int getNofobservations() { return nofobservations; } /** * <p> * Sets the number of observations stored in the data set * </p> * @param nofobservations New number of observations */ public void setNofobservations(int nofobservations) { this.nofobservations = nofobservations; if (nofoutputs != -1 && nofinputs != -1) init(); } /** * <p> * Returns the number of variables stored in the data set * </p> * @return int Number of variables */ public int getNofvariables() { return nofvariables; } /** * <p> * Sets the number of variables stored in the data set * </p> * @param nofvariables New number of variables */ public void setNofvariables(int nofvariables) { this.nofvariables = nofvariables; } /** * <p> * Returns the number of outputs of the observations stored in the data set * </p> * @return int Number of outputs */ public int getNofoutputs() { return nofoutputs; } /** * <p> * Sets the number of outputs of the observations stored in the data set * </p> * @param nofoutputs New number of outputs */ public void setNofoutputs(int nofoutputs) { this.nofoutputs = nofoutputs; if (nofinputs != -1 && nofobservations != -1) init(); } /** * <p> * Returns an specified observation * </p> * @param nofobservation Number of observation to return * @return double [] Array with the specified observation */ public double[] getObservation(int nofobservation) { double[] observation = new double[nofvariables]; for (int i = 0; i < nofvariables; i++) observation[i] = array[i][nofobservation]; return observation; } /** * <p> * Sets an specified observation * </p> * @param nofobservation Number of observation * @param observation New observation */ public void setObservation(int nofobservation, double[] observation) { for (int i = 0; i < nofvariables; i++) array[i][nofobservation] = observation[i]; } /** * <p> * Returns the outputs of an specified observation * </p> * @param nofobservation Number of the observation * @return double [] Array with the outputs of the observation */ public double[] getOutputs(int nofobservation) { double[] outputs = new double[nofoutputs]; for (int i = nofinputs; i < nofvariables; i++) outputs[i - nofinputs] = array[i][nofobservation]; return outputs; } /** * <p> * Returns a matrix with all the outputs of the dataSet * in rows. * </p> * @return double [][] Matrix with all the outputs of the dataSet */ public double[][] getAllOutputs() { double[][] outputs = new double[nofoutputs][nofobservations]; for (int i = nofinputs; i < nofvariables; i++) outputs[i - nofinputs] = array[i]; return outputs; } /** * <p> * Returns the inputs of an specified observation * </p> * @param nofobservation Number of the observation * @return double [] Array with the inputs of the observation */ public double[] getInputs(int nofobservation) { double[] inputs = new double[nofinputs]; for (int i = 0; i < nofinputs; i++) inputs[i] = array[i][nofobservation]; return inputs; } /** * <p> * Returns a matrix with all the inputs of the dataSet * in rows * </p> * @return double [][] Matrix with all the inputs of the dataSet */ public double[][] getAllInputs() { double[][] inputs = new double[nofinputs][nofobservations]; for (int i = 0; i < nofinputs; i++) inputs[i] = array[i]; return inputs; } /** * <p> * Returns all the values of an output in the data set * </p> * @param nofoutput Number of the output * @return double [] Array with all the values of the output */ public double[] getOutput(int nofoutput) { return array[nofinputs + nofoutput]; } /** * <p> * Sets all the values of an output in the data set * </p> * @param nofoutput Number of the output * @param values Double array with all the values of the output */ public void setOutput(int nofoutput, double[] values) { array[nofinputs + nofoutput] = values; } /** * <p> * Returns all the values of a variable in the data set * </p> * @param nofvariable Number of the variable * @return double [] Array with all the values of the variable */ public double[] getObservationsOf(int nofvariable) { return array[nofvariable]; } /** * <p> * Sets all the values of a variable in the data set * </p> * @param nofvariable Number of the variable * @param values Double array with all the values of the variable */ public void setObservationsOf(int nofvariable, double[] values) { array[nofvariable] = values; } /** * <p> * Returns the mean of a specific number of output * </p> * @param index Number of output mean to return * @return double Output mean */ public double getOutputMean(int index) { return outputMeans[index]; } /** * <p> * Returns the maximum value of a specific variable * </p> * @param index Number of variable maximum value to return * @return double Maximum value */ public double getMaxValueOf(int index) { double max = array[index][0]; for (int i = 0; i < nofobservations; i++) if (array[index][i] > max) max = array[index][i]; return max; } /** * <p> * Returns the minimum value of a specific variable * </p> * @param index Number of variable minimum value to return * @return double Minimum value */ public double getMinValueOf(int index) { double min = array[index][0]; for (int i = 0; i < nofobservations; i++) if (array[index][i] < min) min = array[index][i]; return min; } /** * <p> * Returns the maximum distance between train data * </p> * @return double Maximum distance */ public double getMaximumDistance() { return maximumDistance; } /** * <p> * Sets the maximum distance between train data * </p> * @param maximumDistance New maximum distance */ public void setMaximumDistance(double maximumDistance) { this.maximumDistance = maximumDistance; } ///////////////////////////////////////////////////////////////// // ----------------------------------------------- Public methods ///////////////////////////////////////////////////////////////// /** * <p> * Init the DoubleTransposedDataSet using a normal IDataset * </p> * @throws DatasetException * @param schema Schema of the dataset * @param dataset Dataset to read data of */ public void read(byte[] schema, IDataset dataset) throws DatasetException { //Open dataset dataset.open(); //Reads number of observations setNofobservations(dataset.numberOfInstances()); //Reads number of inputs and outputs int nOfInputs = 0; int nOfOutputs = 0; for (int i = 0; i < schema.length; i++) if (schema[i] == 1) nOfInputs++; else if (schema[i] == 2) nOfOutputs++; //Metadata IMetadata metadata = dataset.getMetadata(); //Transform categorical attributes for (int i = 0; i < metadata.numberOfAttributes(); i++) { if (metadata.getAttribute(i).getType() == AttributeType.Categorical) { int numberCategories = ((CategoricalAttribute) metadata.getAttribute(i)).getNumberCategories(); if (numberCategories == 2 && schema[i] == 1) numberCategories = 1; if (schema[i] == 1) nOfInputs += (numberCategories - 1); else if (schema[i] == 2) nOfOutputs += (numberCategories - 1); } } //Sets number of outputs and inputs setNofinputs(nOfInputs); setNofoutputs(nOfOutputs); //For each instance int inputCounter = 0; int outputCounter = 0; int instanceCounter = 0; while (dataset.next()) { IDataset.IInstance instancia = dataset.read(); boolean lostValues = false; for (int i = 0; i < schema.length; i++) { double value = instancia.getValue(i); if (Double.isNaN(value)) lostValues = true; if (metadata.getAttribute(i).getType() != AttributeType.Categorical) { if (schema[i] == 1) array[inputCounter++][instanceCounter] = value; else if (schema[i] == 2) array[nOfInputs + (outputCounter++)][instanceCounter] = value; } else { CategoricalAttribute attribute = (CategoricalAttribute) metadata.getAttribute(i); int numberCategories = attribute.getNumberCategories(); if (numberCategories == 2 && schema[i] == 1) numberCategories = 1; if (schema[i] == 1) for (int j = 1; j <= numberCategories; j++) array[inputCounter++][instanceCounter] = ((value == j) ? 1 : 0); else if (schema[i] == 2) for (int j = 1; j <= numberCategories; j++) array[nOfInputs + (outputCounter++)][instanceCounter] = ((value == j) ? 1 : 0); } } if (!lostValues) instanceCounter++; inputCounter = 0; outputCounter = 0; } if (instanceCounter < nofobservations) { nofobservations = instanceCounter; double[][] auxArray = array; array = new double[nofvariables][nofobservations]; for (int i = 0; i < array.length; i++) System.arraycopy(auxArray[i], 0, array[i], 0, array[i].length); } dataset.close(); calculateMeans(); } /** * <p> * Init the array stored in the DataSet * </p> * @throws IOException, NumberFormatException */ public void read() throws IOException, NumberFormatException { try { //DataInputStream to read of BufferedReader reader = new BufferedReader(new FileReader(fileName)); //------------------// //Reading first line// //------------------// String values[] = reader.readLine().split("[\\s\\t]"); //Space or tab separated //Check the text format if (values.length < 3) throw new IOException("Illegal Text Format"); //Reads number of observations setNofobservations(Integer.parseInt(values[0])); //Reads number of inputs setNofinputs(Integer.parseInt(values[1])); //Reads number of outputs setNofoutputs(Integer.parseInt(values[2])); //-------------------// //Reading second line// //-------------------// values = reader.readLine().split("[\\s\\t]"); //Space or tab separated //Count the number of real variables int nofrealvariables = 0; for (int i = 0; i < values.length; i++) if (Byte.parseByte(values[i]) == 1 || Byte.parseByte(values[i]) == 2) nofrealvariables++; //Check the text format if (nofrealvariables != nofvariables) throw new IOException("Illegal Text Format"); //Reads the input schema array byte iSchema[] = new byte[values.length]; for (int i = 0; i < iSchema.length; i++) iSchema[i] = Byte.parseByte(values[i]); //-------------------// //Reading other lines// //-------------------// //Input Counter int ic = 0; //Output Counter int oc = 0; //For each observation for (int i = 0; i < nofobservations; i++) { //Read a line values = reader.readLine().split("[\\s\\t]"); //Space or tab //Check the text format if (values.length < iSchema.length) throw new IOException("Illegal Text Format"); //Read values for (int j = 0; j < iSchema.length; j++) { //Read value double value = Double.parseDouble(values[j]); //If it is an input if (iSchema[j] == 1) { array[ic % nofinputs][i] = value; ic++; } //If it is an output if (iSchema[j] == 2) { array[nofinputs + (oc % nofoutputs)][i] = value; oc++; } //If (iSchema[j]!=2 && iSchema[j]!=1) // then the value is ignored } } } catch (EOFException e) { System.out.println("Illegal Text Format"); throw new IOException("Illegal Text Format"); } catch (NumberFormatException e) { System.out.println("Number format exception"); throw e; } catch (FileNotFoundException e) { System.out.println("File not found"); throw e; } calculateMeans(); } /** * <p> * Obtain the means of all the outputs * </p> */ public void calculateMeans() { //Obtain the means of the outputs if (outputMeans == null) outputMeans = new double[nofoutputs]; for (int j = 0; j < nofoutputs; j++) outputMeans[j] = 0; for (int i = 0; i < nofobservations; i++) { for (int j = 0; j < nofoutputs; j++) outputMeans[j] += array[nofinputs + j][i]; } for (int j = 0; j < nofoutputs; j++) outputMeans[j] /= nofobservations; //Obtain the maximum distance between data obtainMaximumDistance(); } /** * <p> * Returns a string representation of the DataSet * </p> * @return String Representation of the DataSet */ public String toString() { StringBuffer sb = new StringBuffer(); sb.append("<DataSet>\n"); sb.append("<nofobservations>" + nofobservations + "</nofobservations>\n"); sb.append("<nofinputs>" + nofinputs + "</nofinputs>\n"); sb.append("<nofoutputs>" + nofoutputs + "</nofoutputs>\n"); sb.append("<observations>\n"); for (int i = 0; i < nofobservations; i++) { for (int j = 0; j < nofvariables; j++) sb.append(array[j][i] + " "); sb.append("\n"); } sb.append("</observations>\n"); sb.append("</DataSet>"); return sb.toString(); } /** * <p> * Returns a copy of the DataSet * </p> * @return DataSet Copy of the DataSet */ public DoubleTransposedDataSet copy() { DoubleTransposedDataSet result = new DoubleTransposedDataSet(); //Set the fileName result.fileName = this.fileName; //Copy number of observations result.setNofobservations(this.nofobservations); //Copy number of inputs result.setNofinputs(this.nofinputs); //Copy number of outputs result.setNofoutputs(this.nofoutputs); //Copy the array for (int i = 0; i < nofvariables; i++) { System.arraycopy(this.array[i], 0, result.array[i], 0, nofobservations); } //Calculate means result.calculateMeans(); return result; } /** * <p> * Obtain a boolean array with true at these inputs that are constants * </p> * @return boolean [] Constant inputs */ public boolean[] obtainConstantsInputs() { boolean[] toRemove = new boolean[nofinputs]; //Obtain constant inputs for (int i = 0; i < nofinputs; i++) { toRemove[i] = false; double value = array[i][0]; int j = 1; while (j < nofobservations && array[i][j] == value) j++; if (j == nofobservations) toRemove[i] = true; } return toRemove; } /** * <p> * Remove the inputs desired * </p> * @param toRemove Array of Boolean indicating constant inputs with true * @param newNofinputs New number of inputs of the dataset */ public void removeInputs(boolean[] toRemove, int newNofinputs) { //Auxiliary copy DoubleTransposedDataSet aux = copy(); //Remove inputs setNofinputs(newNofinputs); //Copy the array for (int i = 0, j = 0; i < aux.nofvariables; i++) { if (i >= aux.nofinputs || (i < aux.nofinputs && !toRemove[i])) { System.arraycopy(aux.array[i], 0, this.array[j], 0, nofobservations); j++; } } } ///////////////////////////////////////////////////////////////// // ---------------------------------------------- Private methods ///////////////////////////////////////////////////////////////// /** * <p> * Init the array stored in the DataSet * </p> */ private void init() { setNofvariables(nofinputs + nofoutputs); array = new double[nofvariables][nofobservations]; } /** * <p> * Obtain the largest distance between the input data * </p> */ private void obtainMaximumDistance() { maximumDistance = 0; for (int i = 0; i < getNofobservations(); i++) { for (int j = i + 1; j < getNofobservations(); j++) { double distance = 0; // Calculate the distance between two data for (int k = 0; k < getNofinputs(); k++) { double firstComponent = getInputs(i)[k]; // First data double secondComponent = getInputs(j)[k]; // Second data distance += Math.pow(firstComponent - secondComponent, 2.0); } distance = Math.sqrt(distance); // Get the largest distances if (distance > maximumDistance) maximumDistance = distance; } } } ///////////////////////////////////////////////////////////////// // ---------------------------- Implementing IConfigure interface ///////////////////////////////////////////////////////////////// /** * <p> * Configuration parameters for this data set are: * * <ul> * <li> * <code>[@file-name] (String)</code></p> * File name. Name of the file that stores the neccesary information * for this data set. * </li> * </ul> * </p> * @param settings Configuration object from which the properties are going to be read */ public void configure(Configuration settings) { // ----------------------------------------- Setup fileName fileName = settings.getString("[@file-name]"); } }