org.apache.hama.ml.perception.SmallMultiLayerPerceptron.java Source code

Introduction

Here is the source code for org.apache.hama.ml.perception.SmallMultiLayerPerceptron.java
Source

/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.hama.ml.perception;

import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
import java.lang.reflect.Constructor;
import java.lang.reflect.InvocationTargetException;
import java.net.URI;
import java.net.URISyntaxException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.Map;
import java.util.Random;

import org.apache.commons.lang.SerializationUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.io.WritableUtils;
import org.apache.hama.HamaConfiguration;
import org.apache.hama.bsp.BSPJob;
import org.apache.hama.commons.io.MatrixWritable;
import org.apache.hama.commons.io.VectorWritable;
import org.apache.hama.commons.math.DenseDoubleMatrix;
import org.apache.hama.commons.math.DenseDoubleVector;
import org.apache.hama.commons.math.DoubleFunction;
import org.apache.hama.commons.math.DoubleVector;
import org.apache.hama.commons.math.FunctionFactory;
import org.apache.hama.ml.util.FeatureTransformer;
import org.mortbay.log.Log;

/**
 * SmallMultiLayerPerceptronBSP is a kind of multilayer perceptron whose
 * parameters can be fit into the memory of a single machine. This kind of model
 * can be trained and used more efficiently than the BigMultiLayerPerceptronBSP,
 * whose parameters are distributedly stored in multiple machines.
 * 
 * In general, it it is a multilayer perceptron that consists of one input
 * layer, multiple hidden layer and one output layer.
 * 
 * The number of neurons in the input layer should be consistent with the number
 * of features in the training instance. The number of neurons in the output
 * layer
 */
public final class SmallMultiLayerPerceptron extends MultiLayerPerceptron implements Writable {

    /* The in-memory weight matrix */
    private DenseDoubleMatrix[] weightMatrice;

    /* Previous weight updates, used for momentum */
    private DenseDoubleMatrix[] prevWeightUpdateMatrices;

    /**
     * @see MultiLayerPerceptron#MultiLayerPerceptron(double, double, double, String, String, int[])
     */
    public SmallMultiLayerPerceptron(double learningRate, double regularization, double momentum,
            String squashingFunctionName, String costFunctionName, int[] layerSizeArray) {
        super(learningRate, regularization, momentum, squashingFunctionName, costFunctionName, layerSizeArray);
        initializeWeightMatrix();
        this.initializePrevWeightUpdateMatrix();
    }

    /**
     * @see MultiLayerPerceptron#MultiLayerPerceptron(String)
     */
    public SmallMultiLayerPerceptron(String modelPath) {
        super(modelPath);
        if (modelPath != null) {
            try {
                this.readFromModel();
                this.initializePrevWeightUpdateMatrix();
            } catch (IOException e) {
                e.printStackTrace();
            }
        }
    }

    /**
     * Initialize weight matrix using Gaussian distribution. Each weight is
     * initialized in range (-0.5, 0.5)
     */
    private void initializeWeightMatrix() {
        this.weightMatrice = new DenseDoubleMatrix[this.numberOfLayers - 1];
        // each layer contains one bias neuron
        for (int i = 0; i < this.numberOfLayers - 1; ++i) {
            // add weights for bias
            this.weightMatrice[i] = new DenseDoubleMatrix(this.layerSizeArray[i] + 1, this.layerSizeArray[i + 1]);

            this.weightMatrice[i].applyToElements(new DoubleFunction() {

                private final Random rnd = new Random();

                @Override
                public double apply(double value) {
                    return rnd.nextDouble() - 0.5;
                }

                @Override
                public double applyDerivative(double value) {
                    throw new UnsupportedOperationException("Not supported");
                }

            });

            // int rowCount = this.weightMatrice[i].getRowCount();
            // int colCount = this.weightMatrice[i].getColumnCount();
            // for (int row = 0; row < rowCount; ++row) {
            // for (int col = 0; col < colCount; ++col) {
            // this.weightMatrice[i].set(row, col, rnd.nextDouble() - 0.5);
            // }
            // }
        }
    }

    /**
     * Initial the momentum weight matrices.
     */
    private void initializePrevWeightUpdateMatrix() {
        this.prevWeightUpdateMatrices = new DenseDoubleMatrix[this.numberOfLayers - 1];
        for (int i = 0; i < this.prevWeightUpdateMatrices.length; ++i) {
            int row = this.layerSizeArray[i] + 1;
            int col = this.layerSizeArray[i + 1];
            this.prevWeightUpdateMatrices[i] = new DenseDoubleMatrix(row, col);
        }
    }

    @Override
    /**
     * {@inheritDoc}
     * The model meta-data is stored in memory.
     */
    public DoubleVector outputWrapper(DoubleVector featureVector) {
        List<double[]> outputCache = this.outputInternal(featureVector);
        // the output of the last layer is the output of the MLP
        return new DenseDoubleVector(outputCache.get(outputCache.size() - 1));
    }

    private List<double[]> outputInternal(DoubleVector featureVector) {
        // store the output of the hidden layers and output layer, each array store
        // one layer
        List<double[]> outputCache = new ArrayList<double[]>();

        // start from the first hidden layer
        double[] intermediateResults = new double[this.layerSizeArray[0] + 1];
        if (intermediateResults.length - 1 != featureVector.getDimension()) {
            throw new IllegalStateException("Input feature dimension incorrect! The dimension of input layer is "
                    + (this.layerSizeArray[0] - 1) + ", but the dimension of input feature is "
                    + featureVector.getDimension());
        }

        // fill with input features
        intermediateResults[0] = 1.0; // bias

        // transform the original features to another space
        featureVector = this.featureTransformer.transform(featureVector);

        for (int i = 0; i < featureVector.getDimension(); ++i) {
            intermediateResults[i + 1] = featureVector.get(i);
        }
        outputCache.add(intermediateResults);

        // forward the intermediate results to next layer
        for (int fromLayer = 0; fromLayer < this.numberOfLayers - 1; ++fromLayer) {
            intermediateResults = forward(fromLayer, intermediateResults);
            outputCache.add(intermediateResults);
        }

        return outputCache;
    }

    /**
     * Calculate the intermediate results of layer fromLayer + 1.
     * 
     * @param fromLayer The index of layer that forwards the intermediate results
     *          from.
     * @return the value of intermediate results of layer.
     */
    private double[] forward(int fromLayer, double[] intermediateResult) {
        int toLayer = fromLayer + 1;
        double[] results = null;
        int offset = 0;

        if (toLayer < this.layerSizeArray.length - 1) { // add bias if it is not
                                                        // output layer
            results = new double[this.layerSizeArray[toLayer] + 1];
            offset = 1;
            results[0] = 1.0; // the bias
        } else {
            results = new double[this.layerSizeArray[toLayer]]; // no bias
        }

        for (int neuronIdx = 0; neuronIdx < this.layerSizeArray[toLayer]; ++neuronIdx) {
            // aggregate the results from previous layer
            for (int prevNeuronIdx = 0; prevNeuronIdx < this.layerSizeArray[fromLayer] + 1; ++prevNeuronIdx) {
                results[neuronIdx + offset] += this.weightMatrice[fromLayer].get(prevNeuronIdx, neuronIdx)
                        * intermediateResult[prevNeuronIdx];
            }
            // calculate via squashing function
            results[neuronIdx + offset] = this.squashingFunction.apply(results[neuronIdx + offset]);
        }

        return results;
    }

    /**
     * Get the updated weights using one training instance.
     * 
     * @param trainingInstance The trainingInstance is the concatenation of
     *          feature vector and class label vector.
     * @return The update of each weight.
     * @throws Exception
     */
    DenseDoubleMatrix[] trainByInstance(DoubleVector trainingInstance) throws Exception {
        // initialize weight update matrices
        DenseDoubleMatrix[] weightUpdateMatrices = new DenseDoubleMatrix[this.layerSizeArray.length - 1];
        for (int m = 0; m < weightUpdateMatrices.length; ++m) {
            weightUpdateMatrices[m] = new DenseDoubleMatrix(this.layerSizeArray[m] + 1, this.layerSizeArray[m + 1]);
        }

        if (trainingInstance == null) {
            return weightUpdateMatrices;
        }

        // transform the features (exclude the labels) to new space
        double[] trainingVec = trainingInstance.toArray();
        double[] trainingFeature = this.featureTransformer
                .transform(trainingInstance.sliceUnsafe(0, this.layerSizeArray[0] - 1)).toArray();
        double[] trainingLabels = Arrays.copyOfRange(trainingVec, this.layerSizeArray[0], trainingVec.length);

        DoubleVector trainingFeatureVec = new DenseDoubleVector(trainingFeature);
        List<double[]> outputCache = this.outputInternal(trainingFeatureVec);

        // calculate the delta of output layer
        double[] delta = new double[this.layerSizeArray[this.layerSizeArray.length - 1]];
        double[] outputLayerOutput = outputCache.get(outputCache.size() - 1);
        double[] lastHiddenLayerOutput = outputCache.get(outputCache.size() - 2);

        DenseDoubleMatrix prevWeightUpdateMatrix = this.prevWeightUpdateMatrices[this.prevWeightUpdateMatrices.length
                - 1];
        for (int j = 0; j < delta.length; ++j) {
            delta[j] = this.costFunction.applyDerivative(trainingLabels[j], outputLayerOutput[j]);
            // add regularization term
            if (this.regularization != 0.0) {
                double derivativeRegularization = 0.0;
                DenseDoubleMatrix weightMatrix = this.weightMatrice[this.weightMatrice.length - 1];
                for (int k = 0; k < this.layerSizeArray[this.layerSizeArray.length - 1]; ++k) {
                    derivativeRegularization += weightMatrix.get(k, j);
                }
                derivativeRegularization /= this.layerSizeArray[this.layerSizeArray.length - 1];
                delta[j] += this.regularization * derivativeRegularization;
            }

            delta[j] *= this.squashingFunction.applyDerivative(outputLayerOutput[j]);

            // calculate the weight update matrix between the last hidden layer and
            // the output layer
            for (int i = 0; i < this.layerSizeArray[this.layerSizeArray.length - 2] + 1; ++i) {
                double updatedValue = -this.learningRate * delta[j] * lastHiddenLayerOutput[i];
                // add momentum
                updatedValue += this.momentum * prevWeightUpdateMatrix.get(i, j);
                weightUpdateMatrices[weightUpdateMatrices.length - 1].set(i, j, updatedValue);
            }
        }

        // calculate the delta for each hidden layer through back-propagation
        for (int l = this.layerSizeArray.length - 2; l >= 1; --l) {
            delta = backpropagate(l, delta, outputCache, weightUpdateMatrices);
        }

        return weightUpdateMatrices;
    }

    /**
     * Back-propagate the errors from nextLayer to prevLayer. The weight updated
     * information will be stored in the weightUpdateMatrices, and the delta of
     * the prevLayer would be returned.
     * 
     * @param curLayerIdx The layer index of the current layer.
     * @param nextLayerDelta The delta of the next layer.
     * @param outputCache The cache of the output of all the layers.
     * @param weightUpdateMatrices The weight update matrices.
     * @return The delta of the previous layer, will be used for next iteration of
     *         back-propagation.
     */
    private double[] backpropagate(int curLayerIdx, double[] nextLayerDelta, List<double[]> outputCache,
            DenseDoubleMatrix[] weightUpdateMatrices) {
        int prevLayerIdx = curLayerIdx - 1;
        double[] delta = new double[this.layerSizeArray[curLayerIdx]];
        double[] curLayerOutput = outputCache.get(curLayerIdx);
        double[] prevLayerOutput = outputCache.get(prevLayerIdx);

        // DenseDoubleMatrix prevWeightUpdateMatrix = this.prevWeightUpdateMatrices[curLayerIdx - 1];
        // for each neuron j in nextLayer, calculate the delta
        for (int j = 0; j < delta.length; ++j) {
            // aggregate delta from next layer
            for (int k = 0; k < nextLayerDelta.length; ++k) {
                double weight = this.weightMatrice[curLayerIdx].get(j, k);
                delta[j] += weight * nextLayerDelta[k];
            }
            delta[j] *= this.squashingFunction.applyDerivative(curLayerOutput[j + 1]);

            // calculate the weight update matrix between the previous layer and the
            // current layer
            for (int i = 0; i < weightUpdateMatrices[prevLayerIdx].getRowCount(); ++i) {
                double updatedValue = -this.learningRate * delta[j] * prevLayerOutput[i];
                // add momemtum
                // updatedValue += this.momentum * prevWeightUpdateMatrix.get(i, j);
                weightUpdateMatrices[prevLayerIdx].set(i, j, updatedValue);
            }
        }

        return delta;
    }

    @Override
    /**
     * {@inheritDoc}
     */
    public void train(Path dataInputPath, Map<String, String> trainingParams)
            throws IOException, InterruptedException, ClassNotFoundException {
        // create the BSP training job
        Configuration conf = new Configuration();
        for (Map.Entry<String, String> entry : trainingParams.entrySet()) {
            conf.set(entry.getKey(), entry.getValue());
        }

        // put model related parameters
        if (modelPath == null || modelPath.trim().length() == 0) { // build model
                                                                   // from scratch
            conf.set("MLPType", this.MLPType);
            conf.set("learningRate", "" + this.learningRate);
            conf.set("regularization", "" + this.regularization);
            conf.set("momentum", "" + this.momentum);
            conf.set("squashingFunctionName", this.squashingFunctionName);
            conf.set("costFunctionName", this.costFunctionName);
            StringBuilder layerSizeArraySb = new StringBuilder();
            for (int layerSize : this.layerSizeArray) {
                layerSizeArraySb.append(layerSize);
                layerSizeArraySb.append(' ');
            }
            conf.set("layerSizeArray", layerSizeArraySb.toString());
        }

        HamaConfiguration hamaConf = new HamaConfiguration(conf);

        BSPJob job = new BSPJob(hamaConf, SmallMLPTrainer.class);
        job.setJobName("Small scale MLP training");
        job.setJarByClass(SmallMLPTrainer.class);
        job.setBspClass(SmallMLPTrainer.class);
        job.setInputPath(dataInputPath);
        job.setInputFormat(org.apache.hama.bsp.SequenceFileInputFormat.class);
        job.setInputKeyClass(LongWritable.class);
        job.setInputValueClass(VectorWritable.class);
        job.setOutputKeyClass(NullWritable.class);
        job.setOutputValueClass(NullWritable.class);
        job.setOutputFormat(org.apache.hama.bsp.NullOutputFormat.class);

        int numTasks = conf.getInt("tasks", 1);
        job.setNumBspTask(numTasks);
        job.waitForCompletion(true);

        // reload learned model
        Log.info(String.format("Reload model from %s.", trainingParams.get("modelPath")));
        this.modelPath = trainingParams.get("modelPath");
        this.readFromModel();
    }

    @SuppressWarnings("rawtypes")
    @Override
    public void readFields(DataInput input) throws IOException {
        this.MLPType = WritableUtils.readString(input);
        this.learningRate = input.readDouble();
        this.regularization = input.readDouble();
        this.momentum = input.readDouble();
        this.numberOfLayers = input.readInt();
        this.squashingFunctionName = WritableUtils.readString(input);
        this.costFunctionName = WritableUtils.readString(input);

        this.squashingFunction = FunctionFactory.createDoubleFunction(this.squashingFunctionName);
        this.costFunction = FunctionFactory.createDoubleDoubleFunction(this.costFunctionName);

        // read the number of neurons for each layer
        this.layerSizeArray = new int[this.numberOfLayers];
        for (int i = 0; i < numberOfLayers; ++i) {
            this.layerSizeArray[i] = input.readInt();
        }
        this.weightMatrice = new DenseDoubleMatrix[this.numberOfLayers - 1];
        for (int i = 0; i < numberOfLayers - 1; ++i) {
            this.weightMatrice[i] = (DenseDoubleMatrix) MatrixWritable.read(input);
        }

        // read feature transformer
        int bytesLen = input.readInt();
        byte[] featureTransformerBytes = new byte[bytesLen];
        for (int i = 0; i < featureTransformerBytes.length; ++i) {
            featureTransformerBytes[i] = input.readByte();
        }
        Class featureTransformerCls = (Class) SerializationUtils.deserialize(featureTransformerBytes);
        Constructor constructor = featureTransformerCls.getConstructors()[0];
        try {
            this.featureTransformer = (FeatureTransformer) constructor.newInstance(new Object[] {});
        } catch (InstantiationException e) {
            e.printStackTrace();
        } catch (IllegalAccessException e) {
            e.printStackTrace();
        } catch (IllegalArgumentException e) {
            e.printStackTrace();
        } catch (InvocationTargetException e) {
            e.printStackTrace();
        }
    }

    @Override
    public void write(DataOutput output) throws IOException {
        WritableUtils.writeString(output, MLPType);
        output.writeDouble(learningRate);
        output.writeDouble(regularization);
        output.writeDouble(momentum);
        output.writeInt(numberOfLayers);
        WritableUtils.writeString(output, squashingFunctionName);
        WritableUtils.writeString(output, costFunctionName);

        // write the number of neurons for each layer
        for (int i = 0; i < this.numberOfLayers; ++i) {
            output.writeInt(this.layerSizeArray[i]);
        }
        for (int i = 0; i < numberOfLayers - 1; ++i) {
            MatrixWritable matrixWritable = new MatrixWritable(this.weightMatrice[i]);
            matrixWritable.write(output);
        }

        // serialize the feature transformer
        Class<? extends FeatureTransformer> featureTransformerCls = this.featureTransformer.getClass();
        byte[] featureTransformerBytes = SerializationUtils.serialize(featureTransformerCls);
        output.writeInt(featureTransformerBytes.length);
        output.write(featureTransformerBytes);
    }

    /**
     * Read the model meta-data from the specified location.
     * 
     * @throws IOException
     */
    @Override
    protected void readFromModel() throws IOException {
        Configuration conf = new Configuration();
        try {
            URI uri = new URI(modelPath);
            FileSystem fs = FileSystem.get(uri, conf);
            FSDataInputStream is = new FSDataInputStream(fs.open(new Path(modelPath)));
            this.readFields(is);
            if (!this.MLPType.equals(this.getClass().getName())) {
                throw new IllegalStateException(
                        String.format("Model type incorrect, cannot load model '%s' for '%s'.", this.MLPType,
                                this.getClass().getName()));
            }
        } catch (URISyntaxException e) {
            e.printStackTrace();
        }
    }

    /**
     * Write the model to file.
     * 
     * @throws IOException
     */
    @Override
    public void writeModelToFile(String modelPath) throws IOException {
        Configuration conf = new Configuration();
        FileSystem fs = FileSystem.get(conf);
        FSDataOutputStream stream = fs.create(new Path(modelPath), true);
        this.write(stream);
        stream.close();
    }

    DenseDoubleMatrix[] getWeightMatrices() {
        return this.weightMatrice;
    }

    DenseDoubleMatrix[] getPrevWeightUpdateMatrices() {
        return this.prevWeightUpdateMatrices;
    }

    void setWeightMatrices(DenseDoubleMatrix[] newMatrices) {
        this.weightMatrice = newMatrices;
    }

    void setPrevWeightUpdateMatrices(DenseDoubleMatrix[] newPrevWeightUpdateMatrices) {
        this.prevWeightUpdateMatrices = newPrevWeightUpdateMatrices;
    }

    /**
     * Update the weight matrices with given updates.
     * 
     * @param updateMatrices The updates weights in matrix format.
     */
    void updateWeightMatrices(DenseDoubleMatrix[] updateMatrices) {
        for (int m = 0; m < this.weightMatrice.length; ++m) {
            this.weightMatrice[m] = (DenseDoubleMatrix) this.weightMatrice[m].add(updateMatrices[m]);
        }
    }

    /**
     * Print out the weights.
     * 
     * @param mat
     * @return the weights value.
     */
    static String weightsToString(DenseDoubleMatrix[] mat) {
        StringBuilder sb = new StringBuilder();

        for (int i = 0; i < mat.length; ++i) {
            sb.append(String.format("Matrix [%d]\n", i));
            double[][] values = mat[i].getValues();
            for (double[] value : values) {
                sb.append(Arrays.toString(value));
                sb.append('\n');
            }
            sb.append('\n');
        }
        return sb.toString();
    }

    @Override
    protected String getTypeName() {
        return this.getClass().getName();
    }

}