Java tutorial
/* * Copyright 2004-2010 Information & Software Engineering Group (188/1) * Institute of Software Technology and Interactive Systems * Vienna University of Technology, Austria * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.ifs.tuwien.ac.at/dm/somtoolbox/license.html * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package at.tuwien.ifs.somtoolbox.models; import java.io.IOException; import java.util.Properties; import java.util.logging.Logger; import org.apache.commons.lang.NotImplementedException; import com.martiansoftware.jsap.JSAPResult; import com.martiansoftware.jsap.Parameter; import at.tuwien.ifs.somtoolbox.SOMToolboxException; import at.tuwien.ifs.somtoolbox.apps.SOMToolboxApp; import at.tuwien.ifs.somtoolbox.apps.config.AbstractOptionFactory; import at.tuwien.ifs.somtoolbox.apps.config.OptionFactory; import at.tuwien.ifs.somtoolbox.data.InputData; import at.tuwien.ifs.somtoolbox.data.SOMVisualisationData; import at.tuwien.ifs.somtoolbox.data.SharedSOMVisualisationData; import at.tuwien.ifs.somtoolbox.input.SOMInputReader; import at.tuwien.ifs.somtoolbox.input.SOMLibDataWinnerMapping; import at.tuwien.ifs.somtoolbox.input.SOMLibFormatInputReader; import at.tuwien.ifs.somtoolbox.layers.GrowingLayer; import at.tuwien.ifs.somtoolbox.layers.Layer.GridTopology; import at.tuwien.ifs.somtoolbox.layers.LayerAccessException; import at.tuwien.ifs.somtoolbox.layers.ToroidLayer; import at.tuwien.ifs.somtoolbox.layers.TrainingInterruptionListener; import at.tuwien.ifs.somtoolbox.layers.Unit; import at.tuwien.ifs.somtoolbox.layers.quality.QualityMeasure; import at.tuwien.ifs.somtoolbox.output.HTMLOutputter; import at.tuwien.ifs.somtoolbox.output.SOMLibMapOutputter; import at.tuwien.ifs.somtoolbox.output.labeling.AbstractLabeler; import at.tuwien.ifs.somtoolbox.output.labeling.Labeler; import at.tuwien.ifs.somtoolbox.properties.FileProperties; import at.tuwien.ifs.somtoolbox.properties.GHSOMProperties; import at.tuwien.ifs.somtoolbox.properties.PropertiesException; import at.tuwien.ifs.somtoolbox.properties.SOMProperties; import at.tuwien.ifs.somtoolbox.util.StdErrProgressWriter; /** * This class implements the Growing Self-Organizing Map. It is basically a wrapper for the * {@link at.tuwien.ifs.somtoolbox.layers.GrowingLayer} and mainly handles command line execution and parameters. It * implements the {@link at.tuwien.ifs.somtoolbox.models.NetworkModel} interface wich is currently not used, but may be * used in the future. * * @author Michael Dittenbach * @version $Id: GrowingSOM.java 3883 2010-11-02 17:13:23Z frank $ */ public class GrowingSOM extends AbstractNetworkModel implements SOMToolboxApp { public static final Parameter[] OPTIONS = new Parameter[] { OptionFactory.getSwitchHtmlOutput(false), OptionFactory.getOptLabeling(false), OptionFactory.getOptNumberLabels(false), OptionFactory.getOptWeightVectorFileInit(false), OptionFactory.getOptMapDescriptionFile(false), OptionFactory.getSwitchSkipDataWinnerMapping(), OptionFactory.getOptNumberWinners(false), OptionFactory.getOptProperties(true), OptionFactory.getOptUseMultiCPU(false) }; public static final Type APPLICATION_TYPE = Type.Training; public static String DESCRIPTION = "Provides a Growing Grid and a static SOM (with the growth parameters disabled)"; // TODO: Long_Description public static String LONG_DESCRIPTION = "Provides a Growing Grid (Bernd Fritzke, 1995), i.e. a Self-Organising Map that can dynamically grow by inserting whole rows or cells. When setting the growth-controlling parameters to disable growth, it can also be used to train a standard SOM."; /** * Method for stand-alone execution of map training. Options are:<br/> * <ul> * <li>-h toggles HTML output</li> * <li>-l name of class implementing the labeling algorithm</li> * <li>-n number of labels to generate</li> * <li>-w name of weight vector file in case of training an already trained map</li> * <li>-m name of map description file in case of training an already trained map</li> * <li>--noDWM switch to not write the data winner mapping file</li> * <li>properties name of properties file, mandatory</li> * </ul> * * @param args the execution arguments as stated above. */ public static void main(String[] args) { InputData data = null; FileProperties fileProps = null; GrowingSOM som = null; SOMProperties somProps = null; String networkModelName = "GrowingSOM"; // register and parse all options JSAPResult config = OptionFactory.parseResults(args, OPTIONS); Logger.getLogger("at.tuwien.ifs.somtoolbox").info("starting" + networkModelName); int cpus = config.getInt("cpus", 1); int systemCPUs = Runtime.getRuntime().availableProcessors(); // We do not use more CPUs than available! if (cpus > systemCPUs) { String msg = "Number of CPUs required exceeds number of CPUs available."; if (cpus > 2 * systemCPUs) { msg += "Limiting to twice the number of available processors: " + 2 * systemCPUs; cpus = 2 * systemCPUs; } Logger.getLogger("at.tuwien.ifs.somtoolbox").warning(msg); } GrowingLayer.setNO_CPUS(cpus); String propFileName = AbstractOptionFactory.getFilePath(config, "properties"); String weightFileName = AbstractOptionFactory.getFilePath(config, "weightVectorFile"); String mapDescFileName = AbstractOptionFactory.getFilePath(config, "mapDescriptionFile"); String labelerName = config.getString("labeling", null); int numLabels = config.getInt("numberLabels", DEFAULT_LABEL_COUNT); boolean skipDataWinnerMapping = config.getBoolean("skipDataWinnerMapping", false); Labeler labeler = null; // TODO: use parameter for max int numWinners = config.getInt("numberWinners", SOMLibDataWinnerMapping.MAX_DATA_WINNERS); if (labelerName != null) { // if labeling then label try { labeler = AbstractLabeler.instantiate(labelerName); Logger.getLogger("at.tuwien.ifs.somtoolbox").info("Instantiated labeler " + labelerName); } catch (Exception e) { Logger.getLogger("at.tuwien.ifs.somtoolbox") .severe("Could not instantiate labeler \"" + labelerName + "\"."); System.exit(-1); } } if (weightFileName == null) { Logger.getLogger("at.tuwien.ifs.somtoolbox").info("Training a new SOM."); } else { Logger.getLogger("at.tuwien.ifs.somtoolbox").info("Further training of an already trained SOM."); } try { fileProps = new FileProperties(propFileName); somProps = new SOMProperties(propFileName); } catch (PropertiesException e) { Logger.getLogger("at.tuwien.ifs.somtoolbox").severe(e.getMessage() + " Aborting."); System.exit(-1); } data = getInputData(fileProps); if (weightFileName == null) { som = new GrowingSOM(data.isNormalizedToUnitLength(), somProps, data); } else { try { som = new GrowingSOM(new SOMLibFormatInputReader(weightFileName, null, mapDescFileName)); } catch (Exception e) { Logger.getLogger("at.tuwien.ifs.somtoolbox").severe(e.getMessage() + " Aborting."); System.exit(-1); } } if (somProps.getDumpEvery() > 0) { IntermediateSOMDumper dumper = som.new IntermediateSOMDumper(fileProps); som.layer.setTrainingInterruptionListener(dumper, somProps.getDumpEvery()); } // setting input data so it is accessible by map output som.setSharedInputObjects(new SharedSOMVisualisationData(null, null, null, null, fileProps.vectorFileName(true), fileProps.templateFileName(true), null)); som.getSharedInputObjects().setData(SOMVisualisationData.INPUT_VECTOR, data); som.train(data, somProps); if (labelerName != null) { // if labeling then label labeler.label(som, data, numLabels); } try { SOMLibMapOutputter.write(som, fileProps.outputDirectory(), fileProps.namePrefix(false), true, somProps, fileProps); } catch (IOException e) { // TODO: create new exception type Logger.getLogger("at.tuwien.ifs.somtoolbox").severe("Could not open or write to output file " + fileProps.namePrefix(false) + ": " + e.getMessage()); System.exit(-1); } if (!skipDataWinnerMapping) { numWinners = Math.min(numWinners, som.getLayer().getXSize() * som.getLayer().getYSize()); try { SOMLibMapOutputter.writeDataWinnerMappingFile(som, data, numWinners, fileProps.outputDirectory(), fileProps.namePrefix(false), true); } catch (IOException e) { Logger.getLogger("at.tuwien.ifs.somtoolbox").severe("Could not open or write to output file " + fileProps.namePrefix(false) + ": " + e.getMessage()); System.exit(-1); } } else { Logger.getLogger("at.tuwien.ifs.somtoolbox").info("Skipping writing data winner mapping file"); } if (config.getBoolean("htmlOutput") == true) { try { new HTMLOutputter().write(som, fileProps.outputDirectory(), fileProps.namePrefix(false)); } catch (IOException e) { // TODO: create new exception type Logger.getLogger("at.tuwien.ifs.somtoolbox").severe("Could not open or write to output file " + fileProps.namePrefix(false) + ": " + e.getMessage()); System.exit(-1); } } Logger.getLogger("at.tuwien.ifs.somtoolbox").info("finished" + networkModelName + "(" + som.getLayer().getGridLayout() + ", " + som.getLayer().getGridTopology() + ")"); } protected GrowingLayer layer = null; public GrowingSOM(Properties properties) throws PropertiesException { FileProperties fileProps = new FileProperties(properties); SOMProperties somProps = new SOMProperties(properties); InputData data = getInputData(fileProps); // setting input data so it is accessible by map output setSharedInputObjects(new SharedSOMVisualisationData(null, null, null, null, fileProps.vectorFileName(true), fileProps.templateFileName(true), null)); getSharedInputObjects().setData(SOMVisualisationData.INPUT_VECTOR, data); initLayer(data.isNormalizedToUnitLength(), somProps, data); train(data, somProps); if (somProps.getDumpEvery() > 0) { IntermediateSOMDumper dumper = new IntermediateSOMDumper(fileProps); layer.setTrainingInterruptionListener(dumper, somProps.getDumpEvery()); } } /** * Constructs a new <code>GrowingSOM</code> with <code>dim</code>-dimensional weight vectors. Argument * <code>norm</code> determines whether the randomly initialised weight vectors should be normalised to unit length * or not. * * @param norm specifies if the weight vectors are to be normalised to unit length. * @param props the network properties. */ public GrowingSOM(boolean norm, SOMProperties props, InputData data) { initLayer(norm, props, data); } private void initLayer(boolean norm, SOMProperties props, InputData data) { if (props.getGridTopology() == GridTopology.planar) { layer = new GrowingLayer(props.xSize(), props.ySize(), props.zSize(), props.metricName(), data.dim(), norm, props.pca(), props.randomSeed(), data); } else if (props.getGridTopology() == GridTopology.toroid) { layer = new ToroidLayer(props.xSize(), props.ySize(), props.zSize(), props.metricName(), data.dim(), norm, props.pca(), props.randomSeed(), data); } else { throw new NotImplementedException( "Supported for grid topology " + props.getGridTopology() + " not yet implemented."); } } /** * Constructs and trains a new <code>GrowingSOM</code>. All the non-specified parameters will be automatically set * to <i>"default"</i> values. */ public GrowingSOM(int xSize, int ySize, int numIterations, InputData data) throws PropertiesException { SOMProperties props = new SOMProperties(xSize, ySize, numIterations, SOMProperties.defaultLearnRate); initLayer(false, props, data); train(data, props); } /** Constructs and trains a new <code>GrowingSOM</code>. */ public GrowingSOM(int xSize, int ySize, int zSize, String metricName, int numIterations, boolean normalised, boolean usePCAInit, int randomSeed, InputData data) throws PropertiesException { SOMProperties props = new SOMProperties(xSize, ySize, zSize, randomSeed, 0, numIterations, SOMProperties.defaultLearnRate, -1, -1, null, usePCAInit); initLayer(false, props, data); train(data, props); } /** * Constructs a new <code>GrowingSOM</code> with <code>dim</code>-dimensional weight vectors. Argument * <code>norm</code> determines whether the randlomy initialized weight vectors should be normalized to unit length * or not. In hierarchical network models consisting of multiple maps such as the {@link GHSOM}, a unique identifier * is assigned by argument <code>id</code> and the superordinate unit is provided by argument <code>su</code>. * * @param id a unique identifier used in hierarchies of maps (e.g. the <code>GHSOM</code>). * @param su the superordinate unit of the map. * @param dim the dimensionality of the weight vectors. * @param norm specifies if the weight vectors are to be normalized to unit length. * @param props the network properties. */ public GrowingSOM(int id, Unit su, int dim, boolean norm, SOMProperties props, InputData data) { layer = new GrowingLayer(id, su, props.xSize(), props.ySize(), props.zSize(), props.metricName(), dim, norm, props.pca(), props.randomSeed(), data); } /** * Private constructor used recursively in hierarchical network models consisting of multiple maps. A unique * identifier is assigned by argument <code>id</code> and the superordinate unit is provided by argument * <code>su</code>. * * @param id a unique identifier used in hierarchies of maps (e.g. the <code>GHSOM</code>). * @param su the superordinate unit of the map. * @param ir an object implementing the <code>SOMinputReader</code> interface to load an already trained model. */ protected GrowingSOM(int id, Unit su, SOMInputReader ir) { Logger.getLogger("at.tuwien.ifs.somtoolbox").info("Starting layer restoration."); // FIXME: the initialisation of the layer should actually be done in the layer class itself try { // TODO: think about rand seed (7), use map description file when provided layer = new GrowingLayer(id, su, ir.getXSize(), ir.getYSize(), ir.getZSize(), ir.getMetricName(), ir.getDim(), ir.getVectors(), 7); } catch (SOMToolboxException e) { Logger.getLogger("at.tuwien.ifs.somtoolbox").severe(e.getMessage()); System.exit(-1); } labelled = ir.isLabelled(); restoreLayer(id, ir, layer); } protected GrowingSOM(int id, Unit su, SOMInputReader ir, GrowingLayer layer) { this.layer = layer; labelled = ir.isLabelled(); restoreLayer(id, ir, layer); } private void restoreLayer(int id, SOMInputReader ir, GrowingLayer layer) { layer.setGridLayout(ir.getGridLayout()); layer.setGridTopology(ir.getGridTopology()); int numUnits = layer.getXSize() * layer.getYSize(); int currentUnitNum = 0; Logger.getLogger("at.tuwien.ifs.somtoolbox").info("Restoring state of " + numUnits + " units: "); StdErrProgressWriter progressWriter = new StdErrProgressWriter(numUnits, "Restoring state of unit ", 10); try { for (int j = 0; j < layer.getYSize(); j++) { for (int i = 0; i < layer.getXSize(); i++) { // adapted to mnemonic (sparse) SOMs if (layer.getUnit(i, j, 0) == null) { // if this unit is empty, i.e. not part of the mnemonic map // --> we skip it progressWriter.progress("Skipping empty unit " + i + "/" + j + ", ", (currentUnitNum + 1)); } else { // otherwise we read this unit progressWriter.progress("Restoring state of unit " + i + "/" + j + ", ", (currentUnitNum + 1)); layer.getUnit(i, j, 0).restoreMappings(ir.getNrVecMapped(i, j), ir.getMappedVecs(i, j), ir.getMappedVecsDist(i, j)); layer.getUnit(i, j, 0).restoreLabels(ir.getNrUnitLabels(i, j), ir.getUnitLabels(i, j), ir.getUnitLabelsQe(i, j), ir.getUnitLabelsWgt(i, j)); layer.getUnit(i, j, 0).restoreKaskiLabels(ir.getNrKaskiLabels(i, j), ir.getKaskiUnitLabels(i, j), ir.getKaskiUnitLabelsWgt(i, j)); layer.getUnit(i, j, 0).restoreKaskiGateLabels(ir.getNrKaskiGateLabels(i, j), ir.getKaskiGateUnitLabels(i, j, 0)); if (ir.getNrSomsMapped(i, j) > 0) { // if expanded then create new growingsom String subWeightFileName = null; if (ir.getWeightVectorFileName() != null) { subWeightFileName = ir.getFilePath() + ir.getUrlMappedSoms(i, j)[0] + SOMLibFormatInputReader.weightFileNameSuffix; } String subUnitFileName = null; if (ir.getUnitDescriptionFileName() != null) { subUnitFileName = ir.getFilePath() + ir.getUrlMappedSoms(i, j)[0] + SOMLibFormatInputReader.unitFileNameSuffix; } String subMapFileName = null; if (ir.getMapDescriptionFileName() != null) { subMapFileName = ir.getFilePath() + ir.getUrlMappedSoms(i, j)[0] + SOMLibFormatInputReader.mapFileNameSuffix; } try { layer.getUnit(i, j, 0) .setMappedSOM(new GrowingSOM(++id, layer.getUnit(i, j, 0), new SOMLibFormatInputReader(subWeightFileName, subUnitFileName, subMapFileName))); } catch (Exception e) { Logger.getLogger("at.tuwien.ifs.somtoolbox").severe(e.getMessage() + " Aborting."); System.exit(-1); } } } currentUnitNum++; } } // TODO FIXME : pass the quality measure as parameter! String qualityMeasureName = "at.tuwien.ifs.somtoolbox.layers.quality.QuantizationError.mqe"; layer.setQualityMeasure(qualityMeasureName); layer.setCommonVectorLabelPrefix(ir.getCommonVectorLabelPrefix()); } catch (LayerAccessException e) { Logger.getLogger("at.tuwien.ifs.somtoolbox").severe(e.getMessage()); System.exit(-1); } Logger.getLogger("at.tuwien.ifs.somtoolbox").info("Finished layer restoration."); // layer.calculateQuantizationErrorAfterTraining(); is done by the unit. } /** * Constructs an already trained <code>GrowingSOM</code> with a <code>SOMInputReader</code> provided by argument * <code>ir</code>. * * @param ir an object implementing the <code>SOMinputReader</code> interface to load an already trained model. */ public GrowingSOM(SOMInputReader ir) { this(1, null, ir); } /** only used for subclassing */ protected GrowingSOM() { } /** * Returns the actual map layer. * * @return the actual map layer */ public GrowingLayer getLayer() { return layer; } /** * Trains the map with the input data and training parameters specified in the properties provided by argument * <code>props</code>. If the value of property <code>tau</code> is 1, a fix-sized layer is trained, otherwise the * layer grows until a certain quality criterion determined by <code>tau</code> and the mean quantization error * specified by argument <code>mqe0</code> of the data (which is automatically calculated) is reached. This method * is usually used for GHSOM training. * * @param data input data to train the map with. * @param props the training properties. * @param targetQualityValue the desired granularity of data representation. Used for maps in GHSOMs. */ public QualityMeasure train(InputData data, GHSOMProperties props, double targetQualityValue, String qualityMeasureName) { // call training function depending on the properties (iterations || cycles) int iterationsToTrain = 0; if (props.numIterations() > 0) { iterationsToTrain = props.numIterations(); } else { iterationsToTrain = props.numCycles() * data.numVectors(); } return layer.train(data, props.learnrate(), props.sigma(), iterationsToTrain, 0, props.tau(), targetQualityValue, qualityMeasureName, props); } /** * Trains the map with the input data and training parameters specified in the properties provided by argument * <code>props</code>. If the value of property <code>tau</code> is 1, a fix-sized layer is trained, otherwise the * layer grows until a certain quality criterion determined by <code>tau</code> and the mean quantization error of * the data (which is automatically calculated) is reached. * * @param data input data to train the map with. * @param props the training properties */ public void train(InputData data, SOMProperties props) { // call training function depending on the properties (iterations || cycles) int iterationsToTrain = 0; if (props.numIterations() > 0) { iterationsToTrain = props.numIterations(); } else { iterationsToTrain = props.numCycles() * data.numVectors(); } layer.train(data, props.learnrate(), props.sigma(), iterationsToTrain, 0, props.tau(), props.growthQualityMeasureName(), props); } @Override public boolean equals(Object o) { if (o instanceof GrowingSOM) { // compare the layers for equality return getLayer().equalWeights(((GrowingSOM) o).getLayer()); } // false in all other cases... return false; } private class IntermediateSOMDumper implements TrainingInterruptionListener { private final FileProperties fileProperties; public IntermediateSOMDumper(FileProperties fileProperties) { this.fileProperties = fileProperties; } @Override public void interruptionOccurred(int currentIteration, int numIterations) { // FIXME: maybe skip writing the SOM at 0 iterations (0 mod x == 0 ...) String filename = fileProperties.namePrefix(false) + "_" + currentIteration; try { SOMLibMapOutputter.writeWeightVectorFile(GrowingSOM.this, fileProperties.outputDirectory(), filename, true, "$CURRENT_ITERATION=" + currentIteration, "$NUM_ITERATIONS=" + numIterations); } catch (IOException e) { Logger.getLogger("at.tuwien.ifs.somtoolbox") .severe("Could not open or write to output file " + filename + ": " + e.getMessage()); } } } /* (non-Javadoc) * @see java.lang.Object#clone() */ @Override public Object clone() throws CloneNotSupportedException { GrowingSOM newGSOM = new GrowingSOM(); newGSOM.labelled = this.labelled; newGSOM.layer = (GrowingLayer) this.layer.clone(); newGSOM.sharedInputObjects = this.sharedInputObjects; newGSOM.trainingStart = this.trainingStart; return newGSOM; } }