at.tuwien.ifs.somtoolbox.models.GrowingSOM.java Source code

Java tutorial

Introduction

Here is the source code for at.tuwien.ifs.somtoolbox.models.GrowingSOM.java

Source

/*
 * Copyright 2004-2010 Information & Software Engineering Group (188/1)
 *                     Institute of Software Technology and Interactive Systems
 *                     Vienna University of Technology, Austria
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.ifs.tuwien.ac.at/dm/somtoolbox/license.html
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package at.tuwien.ifs.somtoolbox.models;

import java.io.IOException;
import java.util.Properties;
import java.util.logging.Logger;

import org.apache.commons.lang.NotImplementedException;

import com.martiansoftware.jsap.JSAPResult;
import com.martiansoftware.jsap.Parameter;

import at.tuwien.ifs.somtoolbox.SOMToolboxException;
import at.tuwien.ifs.somtoolbox.apps.SOMToolboxApp;
import at.tuwien.ifs.somtoolbox.apps.config.AbstractOptionFactory;
import at.tuwien.ifs.somtoolbox.apps.config.OptionFactory;
import at.tuwien.ifs.somtoolbox.data.InputData;
import at.tuwien.ifs.somtoolbox.data.SOMVisualisationData;
import at.tuwien.ifs.somtoolbox.data.SharedSOMVisualisationData;
import at.tuwien.ifs.somtoolbox.input.SOMInputReader;
import at.tuwien.ifs.somtoolbox.input.SOMLibDataWinnerMapping;
import at.tuwien.ifs.somtoolbox.input.SOMLibFormatInputReader;
import at.tuwien.ifs.somtoolbox.layers.GrowingLayer;
import at.tuwien.ifs.somtoolbox.layers.Layer.GridTopology;
import at.tuwien.ifs.somtoolbox.layers.LayerAccessException;
import at.tuwien.ifs.somtoolbox.layers.ToroidLayer;
import at.tuwien.ifs.somtoolbox.layers.TrainingInterruptionListener;
import at.tuwien.ifs.somtoolbox.layers.Unit;
import at.tuwien.ifs.somtoolbox.layers.quality.QualityMeasure;
import at.tuwien.ifs.somtoolbox.output.HTMLOutputter;
import at.tuwien.ifs.somtoolbox.output.SOMLibMapOutputter;
import at.tuwien.ifs.somtoolbox.output.labeling.AbstractLabeler;
import at.tuwien.ifs.somtoolbox.output.labeling.Labeler;
import at.tuwien.ifs.somtoolbox.properties.FileProperties;
import at.tuwien.ifs.somtoolbox.properties.GHSOMProperties;
import at.tuwien.ifs.somtoolbox.properties.PropertiesException;
import at.tuwien.ifs.somtoolbox.properties.SOMProperties;
import at.tuwien.ifs.somtoolbox.util.StdErrProgressWriter;

/**
 * This class implements the Growing Self-Organizing Map. It is basically a wrapper for the
 * {@link at.tuwien.ifs.somtoolbox.layers.GrowingLayer} and mainly handles command line execution and parameters. It
 * implements the {@link at.tuwien.ifs.somtoolbox.models.NetworkModel} interface wich is currently not used, but may be
 * used in the future.
 * 
 * @author Michael Dittenbach
 * @version $Id: GrowingSOM.java 3883 2010-11-02 17:13:23Z frank $
 */
public class GrowingSOM extends AbstractNetworkModel implements SOMToolboxApp {

    public static final Parameter[] OPTIONS = new Parameter[] { OptionFactory.getSwitchHtmlOutput(false),
            OptionFactory.getOptLabeling(false), OptionFactory.getOptNumberLabels(false),
            OptionFactory.getOptWeightVectorFileInit(false), OptionFactory.getOptMapDescriptionFile(false),
            OptionFactory.getSwitchSkipDataWinnerMapping(), OptionFactory.getOptNumberWinners(false),
            OptionFactory.getOptProperties(true), OptionFactory.getOptUseMultiCPU(false) };

    public static final Type APPLICATION_TYPE = Type.Training;

    public static String DESCRIPTION = "Provides a Growing Grid and a static SOM (with the growth parameters disabled)";

    // TODO: Long_Description
    public static String LONG_DESCRIPTION = "Provides a Growing Grid (Bernd Fritzke, 1995), i.e. a Self-Organising Map that can dynamically grow by inserting whole rows or cells. When setting the growth-controlling parameters to disable growth, it can also be used to train a standard SOM.";

    /**
     * Method for stand-alone execution of map training. Options are:<br/>
     * <ul>
     * <li>-h toggles HTML output</li>
     * <li>-l name of class implementing the labeling algorithm</li>
     * <li>-n number of labels to generate</li>
     * <li>-w name of weight vector file in case of training an already trained map</li>
     * <li>-m name of map description file in case of training an already trained map</li>
     * <li>--noDWM switch to not write the data winner mapping file</li>
     * <li>properties name of properties file, mandatory</li>
     * </ul>
     * 
     * @param args the execution arguments as stated above.
     */
    public static void main(String[] args) {
        InputData data = null;
        FileProperties fileProps = null;

        GrowingSOM som = null;
        SOMProperties somProps = null;
        String networkModelName = "GrowingSOM";

        // register and parse all options
        JSAPResult config = OptionFactory.parseResults(args, OPTIONS);

        Logger.getLogger("at.tuwien.ifs.somtoolbox").info("starting" + networkModelName);

        int cpus = config.getInt("cpus", 1);
        int systemCPUs = Runtime.getRuntime().availableProcessors();
        // We do not use more CPUs than available!
        if (cpus > systemCPUs) {
            String msg = "Number of CPUs required exceeds number of CPUs available.";
            if (cpus > 2 * systemCPUs) {
                msg += "Limiting to twice the number of available processors: " + 2 * systemCPUs;
                cpus = 2 * systemCPUs;
            }
            Logger.getLogger("at.tuwien.ifs.somtoolbox").warning(msg);
        }
        GrowingLayer.setNO_CPUS(cpus);

        String propFileName = AbstractOptionFactory.getFilePath(config, "properties");
        String weightFileName = AbstractOptionFactory.getFilePath(config, "weightVectorFile");
        String mapDescFileName = AbstractOptionFactory.getFilePath(config, "mapDescriptionFile");
        String labelerName = config.getString("labeling", null);
        int numLabels = config.getInt("numberLabels", DEFAULT_LABEL_COUNT);
        boolean skipDataWinnerMapping = config.getBoolean("skipDataWinnerMapping", false);
        Labeler labeler = null;
        // TODO: use parameter for max
        int numWinners = config.getInt("numberWinners", SOMLibDataWinnerMapping.MAX_DATA_WINNERS);

        if (labelerName != null) { // if labeling then label
            try {
                labeler = AbstractLabeler.instantiate(labelerName);
                Logger.getLogger("at.tuwien.ifs.somtoolbox").info("Instantiated labeler " + labelerName);
            } catch (Exception e) {
                Logger.getLogger("at.tuwien.ifs.somtoolbox")
                        .severe("Could not instantiate labeler \"" + labelerName + "\".");
                System.exit(-1);
            }
        }

        if (weightFileName == null) {
            Logger.getLogger("at.tuwien.ifs.somtoolbox").info("Training a new SOM.");
        } else {
            Logger.getLogger("at.tuwien.ifs.somtoolbox").info("Further training of an already trained SOM.");
        }

        try {
            fileProps = new FileProperties(propFileName);
            somProps = new SOMProperties(propFileName);
        } catch (PropertiesException e) {
            Logger.getLogger("at.tuwien.ifs.somtoolbox").severe(e.getMessage() + " Aborting.");
            System.exit(-1);
        }

        data = getInputData(fileProps);

        if (weightFileName == null) {
            som = new GrowingSOM(data.isNormalizedToUnitLength(), somProps, data);
        } else {
            try {
                som = new GrowingSOM(new SOMLibFormatInputReader(weightFileName, null, mapDescFileName));
            } catch (Exception e) {
                Logger.getLogger("at.tuwien.ifs.somtoolbox").severe(e.getMessage() + " Aborting.");
                System.exit(-1);
            }
        }

        if (somProps.getDumpEvery() > 0) {
            IntermediateSOMDumper dumper = som.new IntermediateSOMDumper(fileProps);
            som.layer.setTrainingInterruptionListener(dumper, somProps.getDumpEvery());
        }

        // setting input data so it is accessible by map output
        som.setSharedInputObjects(new SharedSOMVisualisationData(null, null, null, null,
                fileProps.vectorFileName(true), fileProps.templateFileName(true), null));
        som.getSharedInputObjects().setData(SOMVisualisationData.INPUT_VECTOR, data);

        som.train(data, somProps);

        if (labelerName != null) { // if labeling then label
            labeler.label(som, data, numLabels);
        }

        try {
            SOMLibMapOutputter.write(som, fileProps.outputDirectory(), fileProps.namePrefix(false), true, somProps,
                    fileProps);
        } catch (IOException e) { // TODO: create new exception type
            Logger.getLogger("at.tuwien.ifs.somtoolbox").severe("Could not open or write to output file "
                    + fileProps.namePrefix(false) + ": " + e.getMessage());
            System.exit(-1);
        }
        if (!skipDataWinnerMapping) {
            numWinners = Math.min(numWinners, som.getLayer().getXSize() * som.getLayer().getYSize());
            try {
                SOMLibMapOutputter.writeDataWinnerMappingFile(som, data, numWinners, fileProps.outputDirectory(),
                        fileProps.namePrefix(false), true);
            } catch (IOException e) {
                Logger.getLogger("at.tuwien.ifs.somtoolbox").severe("Could not open or write to output file "
                        + fileProps.namePrefix(false) + ": " + e.getMessage());
                System.exit(-1);
            }
        } else {
            Logger.getLogger("at.tuwien.ifs.somtoolbox").info("Skipping writing data winner mapping file");
        }

        if (config.getBoolean("htmlOutput") == true) {
            try {
                new HTMLOutputter().write(som, fileProps.outputDirectory(), fileProps.namePrefix(false));
            } catch (IOException e) { // TODO: create new exception type
                Logger.getLogger("at.tuwien.ifs.somtoolbox").severe("Could not open or write to output file "
                        + fileProps.namePrefix(false) + ": " + e.getMessage());
                System.exit(-1);
            }
        }

        Logger.getLogger("at.tuwien.ifs.somtoolbox").info("finished" + networkModelName + "("
                + som.getLayer().getGridLayout() + ", " + som.getLayer().getGridTopology() + ")");
    }

    protected GrowingLayer layer = null;

    public GrowingSOM(Properties properties) throws PropertiesException {
        FileProperties fileProps = new FileProperties(properties);
        SOMProperties somProps = new SOMProperties(properties);
        InputData data = getInputData(fileProps);

        // setting input data so it is accessible by map output
        setSharedInputObjects(new SharedSOMVisualisationData(null, null, null, null, fileProps.vectorFileName(true),
                fileProps.templateFileName(true), null));
        getSharedInputObjects().setData(SOMVisualisationData.INPUT_VECTOR, data);

        initLayer(data.isNormalizedToUnitLength(), somProps, data);
        train(data, somProps);
        if (somProps.getDumpEvery() > 0) {
            IntermediateSOMDumper dumper = new IntermediateSOMDumper(fileProps);
            layer.setTrainingInterruptionListener(dumper, somProps.getDumpEvery());
        }
    }

    /**
     * Constructs a new <code>GrowingSOM</code> with <code>dim</code>-dimensional weight vectors. Argument
     * <code>norm</code> determines whether the randomly initialised weight vectors should be normalised to unit length
     * or not.
     * 
     * @param norm specifies if the weight vectors are to be normalised to unit length.
     * @param props the network properties.
     */
    public GrowingSOM(boolean norm, SOMProperties props, InputData data) {
        initLayer(norm, props, data);
    }

    private void initLayer(boolean norm, SOMProperties props, InputData data) {
        if (props.getGridTopology() == GridTopology.planar) {
            layer = new GrowingLayer(props.xSize(), props.ySize(), props.zSize(), props.metricName(), data.dim(),
                    norm, props.pca(), props.randomSeed(), data);
        } else if (props.getGridTopology() == GridTopology.toroid) {
            layer = new ToroidLayer(props.xSize(), props.ySize(), props.zSize(), props.metricName(), data.dim(),
                    norm, props.pca(), props.randomSeed(), data);
        } else {
            throw new NotImplementedException(
                    "Supported for grid topology " + props.getGridTopology() + " not yet implemented.");
        }
    }

    /**
     * Constructs and trains a new <code>GrowingSOM</code>. All the non-specified parameters will be automatically set
     * to <i>"default"</i> values.
     */
    public GrowingSOM(int xSize, int ySize, int numIterations, InputData data) throws PropertiesException {
        SOMProperties props = new SOMProperties(xSize, ySize, numIterations, SOMProperties.defaultLearnRate);
        initLayer(false, props, data);
        train(data, props);
    }

    /** Constructs and trains a new <code>GrowingSOM</code>. */
    public GrowingSOM(int xSize, int ySize, int zSize, String metricName, int numIterations, boolean normalised,
            boolean usePCAInit, int randomSeed, InputData data) throws PropertiesException {
        SOMProperties props = new SOMProperties(xSize, ySize, zSize, randomSeed, 0, numIterations,
                SOMProperties.defaultLearnRate, -1, -1, null, usePCAInit);
        initLayer(false, props, data);
        train(data, props);
    }

    /**
     * Constructs a new <code>GrowingSOM</code> with <code>dim</code>-dimensional weight vectors. Argument
     * <code>norm</code> determines whether the randlomy initialized weight vectors should be normalized to unit length
     * or not. In hierarchical network models consisting of multiple maps such as the {@link GHSOM}, a unique identifier
     * is assigned by argument <code>id</code> and the superordinate unit is provided by argument <code>su</code>.
     * 
     * @param id a unique identifier used in hierarchies of maps (e.g. the <code>GHSOM</code>).
     * @param su the superordinate unit of the map.
     * @param dim the dimensionality of the weight vectors.
     * @param norm specifies if the weight vectors are to be normalized to unit length.
     * @param props the network properties.
     */
    public GrowingSOM(int id, Unit su, int dim, boolean norm, SOMProperties props, InputData data) {
        layer = new GrowingLayer(id, su, props.xSize(), props.ySize(), props.zSize(), props.metricName(), dim, norm,
                props.pca(), props.randomSeed(), data);
    }

    /**
     * Private constructor used recursively in hierarchical network models consisting of multiple maps. A unique
     * identifier is assigned by argument <code>id</code> and the superordinate unit is provided by argument
     * <code>su</code>.
     * 
     * @param id a unique identifier used in hierarchies of maps (e.g. the <code>GHSOM</code>).
     * @param su the superordinate unit of the map.
     * @param ir an object implementing the <code>SOMinputReader</code> interface to load an already trained model.
     */
    protected GrowingSOM(int id, Unit su, SOMInputReader ir) {
        Logger.getLogger("at.tuwien.ifs.somtoolbox").info("Starting layer restoration.");

        // FIXME: the initialisation of the layer should actually be done in the layer class itself
        try {
            // TODO: think about rand seed (7), use map description file when provided
            layer = new GrowingLayer(id, su, ir.getXSize(), ir.getYSize(), ir.getZSize(), ir.getMetricName(),
                    ir.getDim(), ir.getVectors(), 7);
        } catch (SOMToolboxException e) {
            Logger.getLogger("at.tuwien.ifs.somtoolbox").severe(e.getMessage());
            System.exit(-1);
        }
        labelled = ir.isLabelled();

        restoreLayer(id, ir, layer);
    }

    protected GrowingSOM(int id, Unit su, SOMInputReader ir, GrowingLayer layer) {
        this.layer = layer;
        labelled = ir.isLabelled();
        restoreLayer(id, ir, layer);
    }

    private void restoreLayer(int id, SOMInputReader ir, GrowingLayer layer) {
        layer.setGridLayout(ir.getGridLayout());
        layer.setGridTopology(ir.getGridTopology());

        int numUnits = layer.getXSize() * layer.getYSize();
        int currentUnitNum = 0;

        Logger.getLogger("at.tuwien.ifs.somtoolbox").info("Restoring state of " + numUnits + " units: ");

        StdErrProgressWriter progressWriter = new StdErrProgressWriter(numUnits, "Restoring state of unit ", 10);
        try {
            for (int j = 0; j < layer.getYSize(); j++) {
                for (int i = 0; i < layer.getXSize(); i++) {
                    // adapted to mnemonic (sparse) SOMs
                    if (layer.getUnit(i, j, 0) == null) { // if this unit is empty, i.e. not part of the mnemonic map
                        // --> we skip it
                        progressWriter.progress("Skipping empty unit " + i + "/" + j + ", ", (currentUnitNum + 1));
                    } else { // otherwise we read this unit
                        progressWriter.progress("Restoring state of unit " + i + "/" + j + ", ",
                                (currentUnitNum + 1));
                        layer.getUnit(i, j, 0).restoreMappings(ir.getNrVecMapped(i, j), ir.getMappedVecs(i, j),
                                ir.getMappedVecsDist(i, j));
                        layer.getUnit(i, j, 0).restoreLabels(ir.getNrUnitLabels(i, j), ir.getUnitLabels(i, j),
                                ir.getUnitLabelsQe(i, j), ir.getUnitLabelsWgt(i, j));
                        layer.getUnit(i, j, 0).restoreKaskiLabels(ir.getNrKaskiLabels(i, j),
                                ir.getKaskiUnitLabels(i, j), ir.getKaskiUnitLabelsWgt(i, j));
                        layer.getUnit(i, j, 0).restoreKaskiGateLabels(ir.getNrKaskiGateLabels(i, j),
                                ir.getKaskiGateUnitLabels(i, j, 0));
                        if (ir.getNrSomsMapped(i, j) > 0) { // if expanded then create new growingsom
                            String subWeightFileName = null;
                            if (ir.getWeightVectorFileName() != null) {
                                subWeightFileName = ir.getFilePath() + ir.getUrlMappedSoms(i, j)[0]
                                        + SOMLibFormatInputReader.weightFileNameSuffix;
                            }
                            String subUnitFileName = null;
                            if (ir.getUnitDescriptionFileName() != null) {
                                subUnitFileName = ir.getFilePath() + ir.getUrlMappedSoms(i, j)[0]
                                        + SOMLibFormatInputReader.unitFileNameSuffix;
                            }
                            String subMapFileName = null;
                            if (ir.getMapDescriptionFileName() != null) {
                                subMapFileName = ir.getFilePath() + ir.getUrlMappedSoms(i, j)[0]
                                        + SOMLibFormatInputReader.mapFileNameSuffix;
                            }
                            try {
                                layer.getUnit(i, j, 0)
                                        .setMappedSOM(new GrowingSOM(++id, layer.getUnit(i, j, 0),
                                                new SOMLibFormatInputReader(subWeightFileName, subUnitFileName,
                                                        subMapFileName)));
                            } catch (Exception e) {
                                Logger.getLogger("at.tuwien.ifs.somtoolbox").severe(e.getMessage() + " Aborting.");
                                System.exit(-1);
                            }
                        }
                    }
                    currentUnitNum++;
                }
            }
            // TODO FIXME : pass the quality measure as parameter!
            String qualityMeasureName = "at.tuwien.ifs.somtoolbox.layers.quality.QuantizationError.mqe";
            layer.setQualityMeasure(qualityMeasureName);
            layer.setCommonVectorLabelPrefix(ir.getCommonVectorLabelPrefix());
        } catch (LayerAccessException e) {
            Logger.getLogger("at.tuwien.ifs.somtoolbox").severe(e.getMessage());
            System.exit(-1);
        }
        Logger.getLogger("at.tuwien.ifs.somtoolbox").info("Finished layer restoration.");
        // layer.calculateQuantizationErrorAfterTraining(); is done by the unit.
    }

    /**
     * Constructs an already trained <code>GrowingSOM</code> with a <code>SOMInputReader</code> provided by argument
     * <code>ir</code>.
     * 
     * @param ir an object implementing the <code>SOMinputReader</code> interface to load an already trained model.
     */
    public GrowingSOM(SOMInputReader ir) {
        this(1, null, ir);
    }

    /** only used for subclassing */
    protected GrowingSOM() {
    }

    /**
     * Returns the actual map layer.
     * 
     * @return the actual map layer
     */
    public GrowingLayer getLayer() {
        return layer;
    }

    /**
     * Trains the map with the input data and training parameters specified in the properties provided by argument
     * <code>props</code>. If the value of property <code>tau</code> is 1, a fix-sized layer is trained, otherwise the
     * layer grows until a certain quality criterion determined by <code>tau</code> and the mean quantization error
     * specified by argument <code>mqe0</code> of the data (which is automatically calculated) is reached. This method
     * is usually used for GHSOM training.
     * 
     * @param data input data to train the map with.
     * @param props the training properties.
     * @param targetQualityValue the desired granularity of data representation. Used for maps in GHSOMs.
     */
    public QualityMeasure train(InputData data, GHSOMProperties props, double targetQualityValue,
            String qualityMeasureName) {
        // call training function depending on the properties (iterations || cycles)
        int iterationsToTrain = 0;
        if (props.numIterations() > 0) {
            iterationsToTrain = props.numIterations();
        } else {
            iterationsToTrain = props.numCycles() * data.numVectors();
        }

        return layer.train(data, props.learnrate(), props.sigma(), iterationsToTrain, 0, props.tau(),
                targetQualityValue, qualityMeasureName, props);
    }

    /**
     * Trains the map with the input data and training parameters specified in the properties provided by argument
     * <code>props</code>. If the value of property <code>tau</code> is 1, a fix-sized layer is trained, otherwise the
     * layer grows until a certain quality criterion determined by <code>tau</code> and the mean quantization error of
     * the data (which is automatically calculated) is reached.
     * 
     * @param data input data to train the map with.
     * @param props the training properties
     */
    public void train(InputData data, SOMProperties props) {
        // call training function depending on the properties (iterations || cycles)
        int iterationsToTrain = 0;
        if (props.numIterations() > 0) {
            iterationsToTrain = props.numIterations();
        } else {
            iterationsToTrain = props.numCycles() * data.numVectors();
        }

        layer.train(data, props.learnrate(), props.sigma(), iterationsToTrain, 0, props.tau(),
                props.growthQualityMeasureName(), props);
    }

    @Override
    public boolean equals(Object o) {
        if (o instanceof GrowingSOM) {
            // compare the layers for equality
            return getLayer().equalWeights(((GrowingSOM) o).getLayer());
        }
        // false in all other cases...
        return false;
    }

    private class IntermediateSOMDumper implements TrainingInterruptionListener {

        private final FileProperties fileProperties;

        public IntermediateSOMDumper(FileProperties fileProperties) {
            this.fileProperties = fileProperties;
        }

        @Override
        public void interruptionOccurred(int currentIteration, int numIterations) {
            // FIXME: maybe skip writing the SOM at 0 iterations (0 mod x == 0 ...)
            String filename = fileProperties.namePrefix(false) + "_" + currentIteration;
            try {
                SOMLibMapOutputter.writeWeightVectorFile(GrowingSOM.this, fileProperties.outputDirectory(),
                        filename, true, "$CURRENT_ITERATION=" + currentIteration,
                        "$NUM_ITERATIONS=" + numIterations);
            } catch (IOException e) {
                Logger.getLogger("at.tuwien.ifs.somtoolbox")
                        .severe("Could not open or write to output file " + filename + ": " + e.getMessage());
            }

        }

    }

    /* (non-Javadoc)
     * @see java.lang.Object#clone()
     */
    @Override
    public Object clone() throws CloneNotSupportedException {
        GrowingSOM newGSOM = new GrowingSOM();
        newGSOM.labelled = this.labelled;
        newGSOM.layer = (GrowingLayer) this.layer.clone();
        newGSOM.sharedInputObjects = this.sharedInputObjects;
        newGSOM.trainingStart = this.trainingStart;

        return newGSOM;
    }

}