Java tutorial
/** * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.hama.ml.ann; import java.io.DataInput; import java.io.DataOutput; import java.io.IOException; import java.util.ArrayList; import java.util.Collections; import java.util.List; import java.util.Map; import org.apache.commons.lang.math.RandomUtils; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.NullWritable; import org.apache.hadoop.io.WritableUtils; import org.apache.hama.HamaConfiguration; import org.apache.hama.bsp.BSPJob; import org.apache.hama.commons.io.MatrixWritable; import org.apache.hama.commons.io.VectorWritable; import org.apache.hama.commons.math.DenseDoubleMatrix; import org.apache.hama.commons.math.DenseDoubleVector; import org.apache.hama.commons.math.DoubleFunction; import org.apache.hama.commons.math.DoubleMatrix; import org.apache.hama.commons.math.DoubleVector; import org.apache.hama.commons.math.FunctionFactory; import org.mortbay.log.Log; import com.google.common.base.Preconditions; import com.google.common.collect.Lists; /** * SmallLayeredNeuralNetwork defines the general operations for derivative * layered models, include Linear Regression, Logistic Regression, Multilayer * Perceptron, Autoencoder, and Restricted Boltzmann Machine, etc. For * SmallLayeredNeuralNetwork, the training can be conducted in parallel, but the * parameters of the models are assumes to be stored in a single machine. * * In general, these models consist of neurons which are aligned in layers. * Between layers, for any two adjacent layers, the neurons are connected to * form a bipartite weighted graph. * */ public class SmallLayeredNeuralNetwork extends AbstractLayeredNeuralNetwork { /* Weights between neurons at adjacent layers */ protected List<DoubleMatrix> weightMatrixList; /* Previous weight updates between neurons at adjacent layers */ protected List<DoubleMatrix> prevWeightUpdatesList; /* Different layers can have different squashing function */ protected List<DoubleFunction> squashingFunctionList; protected int finalLayerIdx; public SmallLayeredNeuralNetwork() { this.layerSizeList = Lists.newArrayList(); this.weightMatrixList = Lists.newArrayList(); this.prevWeightUpdatesList = Lists.newArrayList(); this.squashingFunctionList = Lists.newArrayList(); } public SmallLayeredNeuralNetwork(String modelPath) { super(modelPath); } @Override /** * {@inheritDoc} */ public int addLayer(int size, boolean isFinalLayer, DoubleFunction squashingFunction) { Preconditions.checkArgument(size > 0, "Size of layer must be larger than 0."); if (!isFinalLayer) { size += 1; } this.layerSizeList.add(size); int layerIdx = this.layerSizeList.size() - 1; if (isFinalLayer) { this.finalLayerIdx = layerIdx; } // add weights between current layer and previous layer, and input layer has // no squashing function if (layerIdx > 0) { int sizePrevLayer = this.layerSizeList.get(layerIdx - 1); // row count equals to size of current size and column count equals to // size of previous layer int row = isFinalLayer ? size : size - 1; int col = sizePrevLayer; DoubleMatrix weightMatrix = new DenseDoubleMatrix(row, col); // initialize weights weightMatrix.applyToElements(new DoubleFunction() { @Override public double apply(double value) { return RandomUtils.nextDouble() - 0.5; } @Override public double applyDerivative(double value) { throw new UnsupportedOperationException(""); } }); this.weightMatrixList.add(weightMatrix); this.prevWeightUpdatesList.add(new DenseDoubleMatrix(row, col)); this.squashingFunctionList.add(squashingFunction); } return layerIdx; } /** * Update the weight matrices with given matrices. * * @param matrices */ public void updateWeightMatrices(DoubleMatrix[] matrices) { for (int i = 0; i < matrices.length; ++i) { DoubleMatrix matrix = this.weightMatrixList.get(i); this.weightMatrixList.set(i, matrix.add(matrices[i])); } } /** * Set the previous weight matrices. * @param prevUpdates */ void setPrevWeightMatrices(DoubleMatrix[] prevUpdates) { this.prevWeightUpdatesList.clear(); Collections.addAll(this.prevWeightUpdatesList, prevUpdates); } /** * Add a batch of matrices onto the given destination matrices. * * @param destMatrices * @param sourceMatrices */ static void matricesAdd(DoubleMatrix[] destMatrices, DoubleMatrix[] sourceMatrices) { for (int i = 0; i < destMatrices.length; ++i) { destMatrices[i] = destMatrices[i].add(sourceMatrices[i]); } } /** * Get all the weight matrices. * * @return The matrices in form of matrix array. */ DoubleMatrix[] getWeightMatrices() { DoubleMatrix[] matrices = new DoubleMatrix[this.weightMatrixList.size()]; this.weightMatrixList.toArray(matrices); return matrices; } /** * Set the weight matrices. * * @param matrices */ public void setWeightMatrices(DoubleMatrix[] matrices) { this.weightMatrixList = new ArrayList<DoubleMatrix>(); Collections.addAll(this.weightMatrixList, matrices); } /** * Get the previous matrices updates in form of array. * * @return The matrices in form of matrix array. */ public DoubleMatrix[] getPrevMatricesUpdates() { DoubleMatrix[] prevMatricesUpdates = new DoubleMatrix[this.prevWeightUpdatesList.size()]; for (int i = 0; i < this.prevWeightUpdatesList.size(); ++i) { prevMatricesUpdates[i] = this.prevWeightUpdatesList.get(i); } return prevMatricesUpdates; } public void setWeightMatrix(int index, DoubleMatrix matrix) { Preconditions.checkArgument(0 <= index && index < this.weightMatrixList.size(), String.format("index [%d] should be in range[%d, %d].", index, 0, this.weightMatrixList.size())); this.weightMatrixList.set(index, matrix); } @Override public void readFields(DataInput input) throws IOException { super.readFields(input); // read squash functions int squashingFunctionSize = input.readInt(); this.squashingFunctionList = Lists.newArrayList(); for (int i = 0; i < squashingFunctionSize; ++i) { this.squashingFunctionList.add(FunctionFactory.createDoubleFunction(WritableUtils.readString(input))); } // read weights and construct matrices of previous updates int numOfMatrices = input.readInt(); this.weightMatrixList = Lists.newArrayList(); this.prevWeightUpdatesList = Lists.newArrayList(); for (int i = 0; i < numOfMatrices; ++i) { DoubleMatrix matrix = MatrixWritable.read(input); this.weightMatrixList.add(matrix); this.prevWeightUpdatesList.add(new DenseDoubleMatrix(matrix.getRowCount(), matrix.getColumnCount())); } } @Override public void write(DataOutput output) throws IOException { super.write(output); // write squashing functions output.writeInt(this.squashingFunctionList.size()); for (DoubleFunction aSquashingFunctionList : this.squashingFunctionList) { WritableUtils.writeString(output, aSquashingFunctionList.getFunctionName()); } // write weight matrices output.writeInt(this.weightMatrixList.size()); for (DoubleMatrix aWeightMatrixList : this.weightMatrixList) { MatrixWritable.write(aWeightMatrixList, output); } // DO NOT WRITE WEIGHT UPDATE } @Override public DoubleMatrix getWeightsByLayer(int layerIdx) { return this.weightMatrixList.get(layerIdx); } /** * Get the output of the model according to given feature instance. */ @Override public DoubleVector getOutput(DoubleVector instance) { Preconditions.checkArgument(this.layerSizeList.get(0) - 1 == instance.getDimension(), String.format("The dimension of input instance should be %d.", this.layerSizeList.get(0) - 1)); // transform the features to another space DoubleVector transformedInstance = this.featureTransformer.transform(instance); // add bias feature DoubleVector instanceWithBias = new DenseDoubleVector(transformedInstance.getDimension() + 1); instanceWithBias.set(0, 0.99999); // set bias to be a little bit less than // 1.0 for (int i = 1; i < instanceWithBias.getDimension(); ++i) { instanceWithBias.set(i, transformedInstance.get(i - 1)); } List<DoubleVector> outputCache = getOutputInternal(instanceWithBias); // return the output of the last layer DoubleVector result = outputCache.get(outputCache.size() - 1); // remove bias return result.sliceUnsafe(1, result.getDimension() - 1); } /** * Calculate output internally, the intermediate output of each layer will be * stored. * * @param instanceWithBias The instance contains the features. * @return Cached output of each layer. */ public List<DoubleVector> getOutputInternal(DoubleVector instanceWithBias) { List<DoubleVector> outputCache = new ArrayList<DoubleVector>(); // fill with instance DoubleVector intermediateOutput = instanceWithBias; outputCache.add(intermediateOutput); for (int i = 0; i < this.layerSizeList.size() - 1; ++i) { intermediateOutput = forward(i, intermediateOutput); outputCache.add(intermediateOutput); } return outputCache; } /** * Forward the calculation for one layer. * * @param fromLayer The index of the previous layer. * @param intermediateOutput The intermediateOutput of previous layer. * @return a new vector with the result of the operation. */ protected DoubleVector forward(int fromLayer, DoubleVector intermediateOutput) { DoubleMatrix weightMatrix = this.weightMatrixList.get(fromLayer); DoubleVector vec = weightMatrix.multiplyVectorUnsafe(intermediateOutput); vec = vec.applyToElements(this.squashingFunctionList.get(fromLayer)); // add bias DoubleVector vecWithBias = new DenseDoubleVector(vec.getDimension() + 1); vecWithBias.set(0, 1); for (int i = 0; i < vec.getDimension(); ++i) { vecWithBias.set(i + 1, vec.get(i)); } return vecWithBias; } /** * Train the model online. * * @param trainingInstance */ public void trainOnline(DoubleVector trainingInstance) { DoubleMatrix[] updateMatrices = this.trainByInstance(trainingInstance); this.updateWeightMatrices(updateMatrices); } @Override public DoubleMatrix[] trainByInstance(DoubleVector trainingInstance) { DoubleVector transformedVector = this.featureTransformer .transform(trainingInstance.sliceUnsafe(this.layerSizeList.get(0) - 1)); int inputDimension = this.layerSizeList.get(0) - 1; int outputDimension; DoubleVector inputInstance = null; DoubleVector labels = null; if (this.learningStyle == LearningStyle.SUPERVISED) { outputDimension = this.layerSizeList.get(this.layerSizeList.size() - 1); // validate training instance Preconditions.checkArgument(inputDimension + outputDimension == trainingInstance.getDimension(), String.format("The dimension of training instance is %d, but requires %d.", trainingInstance.getDimension(), inputDimension + outputDimension)); inputInstance = new DenseDoubleVector(this.layerSizeList.get(0)); inputInstance.set(0, 1); // add bias // get the features from the transformed vector for (int i = 0; i < inputDimension; ++i) { inputInstance.set(i + 1, transformedVector.get(i)); } // get the labels from the original training instance labels = trainingInstance.sliceUnsafe(inputInstance.getDimension() - 1, trainingInstance.getDimension() - 1); } else if (this.learningStyle == LearningStyle.UNSUPERVISED) { // labels are identical to input features outputDimension = inputDimension; // validate training instance Preconditions.checkArgument(inputDimension == trainingInstance.getDimension(), String.format("The dimension of training instance is %d, but requires %d.", trainingInstance.getDimension(), inputDimension)); inputInstance = new DenseDoubleVector(this.layerSizeList.get(0)); inputInstance.set(0, 1); // add bias // get the features from the transformed vector for (int i = 0; i < inputDimension; ++i) { inputInstance.set(i + 1, transformedVector.get(i)); } // get the labels by copying the transformed vector labels = transformedVector.deepCopy(); } List<DoubleVector> internalResults = this.getOutputInternal(inputInstance); DoubleVector output = internalResults.get(internalResults.size() - 1); // get the training error calculateTrainingError(labels, output.deepCopy().sliceUnsafe(1, output.getDimension() - 1)); if (this.trainingMethod.equals(TrainingMethod.GRADIENT_DESCENT)) { return this.trainByInstanceGradientDescent(labels, internalResults); } else { throw new IllegalArgumentException(String.format("Training method is not supported.")); } } /** * Train by gradient descent. Get the updated weights using one training * instance. * * @param trainingInstance * @return The weight update matrices. */ private DoubleMatrix[] trainByInstanceGradientDescent(DoubleVector labels, List<DoubleVector> internalResults) { DoubleVector output = internalResults.get(internalResults.size() - 1); // initialize weight update matrices DenseDoubleMatrix[] weightUpdateMatrices = new DenseDoubleMatrix[this.weightMatrixList.size()]; for (int m = 0; m < weightUpdateMatrices.length; ++m) { weightUpdateMatrices[m] = new DenseDoubleMatrix(this.weightMatrixList.get(m).getRowCount(), this.weightMatrixList.get(m).getColumnCount()); } DoubleVector deltaVec = new DenseDoubleVector(this.layerSizeList.get(this.layerSizeList.size() - 1)); DoubleFunction squashingFunction = this.squashingFunctionList.get(this.squashingFunctionList.size() - 1); DoubleMatrix lastWeightMatrix = this.weightMatrixList.get(this.weightMatrixList.size() - 1); for (int i = 0; i < deltaVec.getDimension(); ++i) { double costFuncDerivative = this.costFunction.applyDerivative(labels.get(i), output.get(i + 1)); // add regularization costFuncDerivative += this.regularizationWeight * lastWeightMatrix.getRowVector(i).sum(); deltaVec.set(i, costFuncDerivative * squashingFunction.applyDerivative(output.get(i + 1))); } // start from previous layer of output layer for (int layer = this.layerSizeList.size() - 2; layer >= 0; --layer) { output = internalResults.get(layer); deltaVec = backpropagate(layer, deltaVec, internalResults, weightUpdateMatrices[layer]); } this.setPrevWeightMatrices(weightUpdateMatrices); return weightUpdateMatrices; } /** * Back-propagate the errors to from next layer to current layer. The weight * updated information will be stored in the weightUpdateMatrices, and the * delta of the prevLayer would be returned. * * @param layer Index of current layer. * @param internalOutput Internal output of current layer. * @param deltaVec Delta of next layer. * @return the squashing function of the specified position. */ private DoubleVector backpropagate(int curLayerIdx, DoubleVector nextLayerDelta, List<DoubleVector> outputCache, DenseDoubleMatrix weightUpdateMatrix) { // get layer related information DoubleFunction squashingFunction = this.squashingFunctionList.get(curLayerIdx); DoubleVector curLayerOutput = outputCache.get(curLayerIdx); DoubleMatrix weightMatrix = this.weightMatrixList.get(curLayerIdx); DoubleMatrix prevWeightMatrix = this.prevWeightUpdatesList.get(curLayerIdx); // next layer is not output layer, remove the delta of bias neuron if (curLayerIdx != this.layerSizeList.size() - 2) { nextLayerDelta = nextLayerDelta.slice(1, nextLayerDelta.getDimension() - 1); } DoubleVector delta = weightMatrix.transpose().multiplyVector(nextLayerDelta); for (int i = 0; i < delta.getDimension(); ++i) { delta.set(i, delta.get(i) * squashingFunction.applyDerivative(curLayerOutput.get(i))); } // update weights for (int i = 0; i < weightUpdateMatrix.getRowCount(); ++i) { for (int j = 0; j < weightUpdateMatrix.getColumnCount(); ++j) { weightUpdateMatrix.set(i, j, -learningRate * nextLayerDelta.get(i) * curLayerOutput.get(j) + this.momentumWeight * prevWeightMatrix.get(i, j)); } } return delta; } @Override protected void trainInternal(Path dataInputPath, Map<String, String> trainingParams) throws IOException, InterruptedException, ClassNotFoundException { // add all training parameters to configuration Configuration conf = new Configuration(); for (Map.Entry<String, String> entry : trainingParams.entrySet()) { conf.set(entry.getKey(), entry.getValue()); } // if training parameters contains the model path, update the model path String modelPath = trainingParams.get("modelPath"); if (modelPath != null) { this.modelPath = modelPath; } // modelPath must be set before training if (this.modelPath == null) { throw new IllegalArgumentException("Please specify the modelPath for model, " + "either through setModelPath() or add 'modelPath' to the training parameters."); } conf.set("modelPath", this.modelPath); this.writeModelToFile(); HamaConfiguration hamaConf = new HamaConfiguration(conf); // create job BSPJob job = new BSPJob(hamaConf, SmallLayeredNeuralNetworkTrainer.class); job.setJobName("Small scale Neural Network training"); job.setJarByClass(SmallLayeredNeuralNetworkTrainer.class); job.setBspClass(SmallLayeredNeuralNetworkTrainer.class); job.setInputPath(dataInputPath); job.setInputFormat(org.apache.hama.bsp.SequenceFileInputFormat.class); job.setInputKeyClass(LongWritable.class); job.setInputValueClass(VectorWritable.class); job.setOutputKeyClass(NullWritable.class); job.setOutputValueClass(NullWritable.class); job.setOutputFormat(org.apache.hama.bsp.NullOutputFormat.class); int numTasks = conf.getInt("tasks", 1); Log.info(String.format("Number of tasks: %d\n", numTasks)); job.setNumBspTask(numTasks); job.waitForCompletion(true); // reload learned model Log.info(String.format("Reload model from %s.", this.modelPath)); this.readFromModel(); } @Override protected void calculateTrainingError(DoubleVector labels, DoubleVector output) { DoubleVector errors = labels.deepCopy().applyToElements(output, this.costFunction); this.trainingError = errors.sum(); } /** * Get the squashing function of a specified layer. * * @param idx * @return a new vector with the result of the operation. */ public DoubleFunction getSquashingFunction(int idx) { return this.squashingFunctionList.get(idx); } }