at.tuwien.ifs.somtoolbox.apps.helper.DataMapper.java Source code

Java tutorial

Introduction

Here is the source code for at.tuwien.ifs.somtoolbox.apps.helper.DataMapper.java

Source

/*
 * Copyright 2004-2010 Information & Software Engineering Group (188/1)
 *                     Institute of Software Technology and Interactive Systems
 *                     Vienna University of Technology, Austria
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.ifs.tuwien.ac.at/dm/somtoolbox/license.html
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package at.tuwien.ifs.somtoolbox.apps.helper;

import java.io.FileNotFoundException;
import java.io.IOException;
import java.util.ArrayList;
import java.util.logging.Logger;

import org.apache.commons.lang.StringUtils;

import com.martiansoftware.jsap.JSAPResult;
import com.martiansoftware.jsap.Parameter;

import at.tuwien.ifs.somtoolbox.SOMToolboxException;
import at.tuwien.ifs.somtoolbox.apps.SOMToolboxApp;
import at.tuwien.ifs.somtoolbox.apps.config.AbstractOptionFactory;
import at.tuwien.ifs.somtoolbox.apps.config.OptionFactory;
import at.tuwien.ifs.somtoolbox.data.InputData;
import at.tuwien.ifs.somtoolbox.data.InputDatum;
import at.tuwien.ifs.somtoolbox.data.SOMLibClassInformation;
import at.tuwien.ifs.somtoolbox.data.SOMLibSparseInputData;
import at.tuwien.ifs.somtoolbox.input.SOMLibFileFormatException;
import at.tuwien.ifs.somtoolbox.input.SOMLibFormatInputReader;
import at.tuwien.ifs.somtoolbox.layers.GrowingLayer;
import at.tuwien.ifs.somtoolbox.layers.Unit;
import at.tuwien.ifs.somtoolbox.layers.metrics.L2Metric;
import at.tuwien.ifs.somtoolbox.models.AbstractNetworkModel;
import at.tuwien.ifs.somtoolbox.models.GrowingSOM;
import at.tuwien.ifs.somtoolbox.output.SOMLibMapOutputter;
import at.tuwien.ifs.somtoolbox.output.labeling.AbstractLabeler;
import at.tuwien.ifs.somtoolbox.output.labeling.Labeler;
import at.tuwien.ifs.somtoolbox.util.FileUtils;
import at.tuwien.ifs.somtoolbox.util.StdErrProgressWriter;

/**
 * Maps inputs to an already trained SOM.
 * 
 * @author Angela Roiger
 * @author Rudolf Mayer
 * @version $Id: DataMapper.java 3987 2011-01-10 15:23:49Z mayer $
 */
public class DataMapper implements SOMToolboxApp {

    public static final Parameter[] OPTIONS = new Parameter[] { OptionFactory.getOptWeightVectorFile(true),
            OptionFactory.getOptMapDescriptionFile(false), OptionFactory.getOptInputVectorFile(true),
            OptionFactory.getOptUnitDescriptionFile(false), OptionFactory.getOptClassInformationFile(false),
            OptionFactory.getOptClasslist(false), OptionFactory.getOptLabeling(false),
            OptionFactory.getOptNumberLabels(false), OptionFactory.getOptNumberWinners(false),
            OptionFactory.getSwitchSkipDataWinnerMapping(), OptionFactory.getOptOutputFileName(false) };

    public static final String DESCRIPTION = "Maps inputs to an already trained SOM.";

    public static final String LONG_DESCRIPTION = DESCRIPTION.concat(
            " If a unit-file is given, the data items are added to the loaded map, without a unti file the mapping starts with an empty map.");

    public static final Type APPLICATION_TYPE = Type.Utils;

    public static void main(String[] args) throws FileNotFoundException, IOException, SOMToolboxException {
        new DataMapper(args);
    }

    public DataMapper(String[] args) throws FileNotFoundException, IOException, SOMToolboxException {
        // register and parse all options
        JSAPResult config = OptionFactory.parseResults(args, OPTIONS);

        String mapDescFileName = AbstractOptionFactory.getFilePath(config, "mapDescriptionFile");
        String weightVectorFileName = AbstractOptionFactory.getFilePath(config, "weightVectorFile");
        String unitDescriptionFileName = AbstractOptionFactory.getFilePath(config, "unitDescriptionFile");
        String classInformationFileName = AbstractOptionFactory.getFilePath(config, "classInformationFile");
        String outputPrefix = AbstractOptionFactory.getFilePath(config, "output");

        String skipClassesString = config.getString("classList");
        boolean skipDataWinnerMapping = config.getBoolean("skipDataWinnerMapping", false);
        int numDataWinners = config.getInt("numberWinners");
        String labelerName = config.getString("labeling", null);
        int numLabels = config.getInt("numberLabels", AbstractNetworkModel.DEFAULT_LABEL_COUNT);

        ArrayList<String> mappingExceptions = new ArrayList<String>();
        if (StringUtils.isNotBlank(skipClassesString)) {
            String[] tmp = skipClassesString.split(",");
            for (String element : tmp) {
                mappingExceptions.add(element);
            }
        }

        GrowingSOM som = null;
        /* restore SOM */
        try {
            som = new GrowingSOM(
                    new SOMLibFormatInputReader(weightVectorFileName, unitDescriptionFileName, mapDescFileName));
        } catch (Exception e) {
            Logger.getLogger("at.tuwien.ifs.somtoolbox").severe(e.getMessage() + " Aborting.");
            e.printStackTrace();
            System.exit(-1);
        }

        SOMLibClassInformation classInfo = null;
        if (classInformationFileName != null) {
            try {
                classInfo = new SOMLibClassInformation(classInformationFileName);
            } catch (SOMToolboxException e1) {
                e1.printStackTrace();
            }
        }

        InputData data = new SOMLibSparseInputData(AbstractOptionFactory.getFilePath(config, "inputVectorFile"));
        // map the data
        mapCompleteDataAfterTraining(som, data, classInfo, mappingExceptions, labelerName, numLabels);
        // compute quality measure
        // TODO FIXME : pass the quality measure as parameter!
        String qualityMeasureName = "at.tuwien.ifs.somtoolbox.layers.quality.QuantizationError.mqe";
        som.getLayer().setQualityMeasure(qualityMeasureName);

        if (outputPrefix == null) {
            outputPrefix = FileUtils.extractSOMLibInputPrefix(FileUtils.stripPathPrefix(weightVectorFileName))
                    + ".remapped";
        }
        try {
            SOMLibMapOutputter.writeUnitDescriptionFile(som, "", outputPrefix, true);
        } catch (IOException e) { // TODO: create new exception type
            Logger.getLogger("at.tuwien.ifs.somtoolbox")
                    .severe("Could not open or write to output file " + outputPrefix + ": " + e.getMessage());
            System.exit(-1);
        }
        if (!skipDataWinnerMapping) {
            int numWinners = Math.min(numDataWinners, som.getLayer().getXSize() * som.getLayer().getYSize());
            try {
                SOMLibMapOutputter.writeDataWinnerMappingFile(som, data, numWinners, "", outputPrefix, true);
            } catch (IOException e) {
                Logger.getLogger("at.tuwien.ifs.somtoolbox")
                        .severe("Could not open or write to output file " + outputPrefix + ": " + e.getMessage());
                System.exit(-1);
            }
        } else {
            Logger.getLogger("at.tuwien.ifs.somtoolbox").info("Skipping writing data winner mapping file");
        }
        // just copy along the class information file, so we have a copy with the same name-prefix, eases SOMViewer
        // starting..
        if (classInformationFileName != null) {
            String classInfoDestination = outputPrefix + ".cls"
                    + (classInformationFileName.endsWith(".gz") ? ".gz" : "");
            FileUtils.copyFile(classInformationFileName, classInfoDestination);
        }
    }

    /**
     * @see GrowingLayer#mapCompleteDataAfterTraining
     */
    // FIXME: this is just a copy of GrowingLayer#mapCompleteDataAfterTraining, would be good to have some code
    // re-used..
    // FIXME: this would also profit from multi-threading...
    private void mapCompleteDataAfterTraining(GrowingSOM som, InputData data, SOMLibClassInformation classInfo,
            ArrayList<String> mappingExceptions, String labelerName, int numLabels) {
        Logger.getLogger("at.tuwien.ifs.somtoolbox").info("Start mapping data.");
        InputDatum datum = null;
        Unit winner = null;
        int numVectors = data.numVectors();

        int skippedInstances = 0;
        for (int i = 0; i < data.numVectors(); i++) {
            try {
                InputDatum currentInput = data.getInputDatum(i);
                String inpLabel = currentInput.getLabel();
                if (classInfo != null && mappingExceptions.contains(classInfo.getClassName(inpLabel))) {
                    skippedInstances++;
                }
            } catch (SOMLibFileFormatException e) {
                // TODO Auto-generated catch block
                e.printStackTrace();
            }
        }
        if (mappingExceptions.size() > 0) {
            Logger.getLogger("at.tuwien.ifs.somtoolbox").info("Skipping classes: " + mappingExceptions
                    + ", containing a total of " + skippedInstances + " inputs.");
        }

        StdErrProgressWriter progressWriter = new StdErrProgressWriter(numVectors - skippedInstances,
                "Mapping datum ", 50);
        L2Metric metric = new L2Metric();
        for (int i = 0; i < numVectors; i++) {
            datum = data.getInputDatum(i);

            String inpLabel = datum.getLabel();
            try {
                if (classInfo != null && mappingExceptions.contains(classInfo.getClassName(inpLabel))) {
                    continue; // Skips this mapping step
                } else {
                    winner = som.getLayer().getWinner(datum, metric);
                    winner.addMappedInput(datum, false); // TODO: think about recursion
                    progressWriter.progress();
                }
            } catch (SOMLibFileFormatException e) {
                // TODO Auto-generated catch block
                Logger.getLogger("at.tuwien.ifs.somtoolbox").info("This should never happen");
                e.printStackTrace();
            }
        }
        Logger.getLogger("at.tuwien.ifs.somtoolbox").info("Finished mapping data.");
        som.getLayer().calculateQuantizationErrorForUnits();
        som.getLayer().clearLabels();

        Labeler labeler = null;

        if (labelerName != null) { // if labeling then label
            try {
                labeler = AbstractLabeler.instantiate(labelerName);
                Logger.getLogger("at.tuwien.ifs.somtoolbox").info("Instantiated labeler " + labelerName);
            } catch (Exception e) {
                Logger.getLogger("at.tuwien.ifs.somtoolbox")
                        .severe("Could not instantiate labeler \"" + labelerName + "\".");
                System.exit(-1);
            }
        }

        if (labelerName != null) { // if labeling then label
            labeler.label(som, data, numLabels);
        }
    }

}