com.skp.experiment.math.als.hadoop.DistributedImplicitFeedbackAlternatingLeastSquaresSolver.java Source code

Java tutorial

Introduction

Here is the source code for com.skp.experiment.math.als.hadoop.DistributedImplicitFeedbackAlternatingLeastSquaresSolver.java

Source

/**
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package com.skp.experiment.math.als.hadoop;

import java.io.IOException;
import java.util.Iterator;
import java.util.Map;

import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.MapFile;
import org.apache.mahout.math.DenseMatrix;
import org.apache.mahout.math.DenseVector;
import org.apache.mahout.math.Matrix;
import org.apache.mahout.math.QRDecomposition;
import org.apache.mahout.math.Vector;
import org.apache.mahout.math.VectorWritable;
import org.apache.mahout.math.function.Functions;
import org.apache.mahout.math.map.OpenIntObjectHashMap;

import com.google.common.base.Preconditions;

/** see <a href="http://research.yahoo.com/pub/2433">Collaborative Filtering for Implicit Feedback Datasets</a> */
public class DistributedImplicitFeedbackAlternatingLeastSquaresSolver {
    private final int numRows;
    private final int numFeatures;
    private final double alpha;
    private final double lambda;
    //private final long maxMatrixSize = 1024 * 1024 * 10;
    private final double maxCacheRatio = 0.95;

    //private final OpenIntObjectHashMap<Vector> Y;
    //private DistributedRowMatrix Y;
    private Matrix YtransposeY;
    private MapFile.Reader reader;
    private OpenIntObjectHashMap<Vector> sparseY;
    private Map<Integer, MapFile.Reader> mapFileReaders;

    public DistributedImplicitFeedbackAlternatingLeastSquaresSolver(int numRows, int numFeatures, double lambda,
            double alpha, MapFile.Reader reader, Matrix YtransposeY) {
        this.numRows = numRows;
        this.numFeatures = numFeatures;
        this.lambda = lambda;
        this.alpha = alpha;
        this.YtransposeY = YtransposeY;
        this.reader = reader;
        this.sparseY = new OpenIntObjectHashMap<Vector>(this.numRows);
        //this.Y = Y;
        //YtransposeY = YtransposeY(Y);
    }

    public void setMapFileReaders(Map<Integer, MapFile.Reader> mapFileReaders) {
        this.mapFileReaders = mapFileReaders;
    }

    private static Vector solve(Matrix A, Matrix y) {
        return new QRDecomposition(A).solve(y).viewColumn(0);
    }

    protected double confidence(double rating) {
        return 1 + alpha * rating;
    }

    private boolean needReset() {
        long usedMemory = Runtime.getRuntime().totalMemory() - Runtime.getRuntime().freeMemory();
        double usedRatio = (double) usedMemory / Runtime.getRuntime().totalMemory();
        if (usedRatio > maxCacheRatio) {
            return true;
        }
        return false;
    }

    private int getPartition(int index) {
        return index % mapFileReaders.size();
    }

    private Vector retrieveRow(int index) throws IOException {
        IntWritable rowIDWritable = new IntWritable(index);
        VectorWritable colWritable = new VectorWritable();
        /*
        if (reader.get(rowIDWritable, colWritable) == null) {
          throw new IOException("find " + index + " in MapFile failed!");
        }
        */

        if (mapFileReaders.get(getPartition(index)).get(rowIDWritable, colWritable) == null) {
            throw new IOException("find " + index + " in MapFile failed!");
        }
        return colWritable.get();
    }

    private Vector getMatrixRow(int index) throws IOException {
        if (needReset()) {
            sparseY.clear();
        }
        if (sparseY.containsKey(index)) {
            return sparseY.get(index);
        }
        // cache
        sparseY.put(index, retrieveRow(index));
        return sparseY.get(index);
    }

    /** get only necessary part of Y matrix 
     * @throws IOException */
    private void getSparseMatrix(Vector userRatings) throws IOException {
        Iterator<Vector.Element> ratings = userRatings.iterateNonZero();
        while (ratings.hasNext()) {
            Vector.Element e = ratings.next();
            getMatrixRow(e.index());
        }
    }

    public Vector solve(Vector userRatings) throws IOException {
        Preconditions.checkArgument(userRatings.isSequentialAccess(), "need sequential access to ratings!");
        //Matrix sparseY = getSparseMatrix(userRatings);
        getSparseMatrix(userRatings);
        /* Y' (Cu - I) Y +  I */
        /* Y' Cu p(u) */
        Vector YtransponseCuPu = new DenseVector(numFeatures);
        /* (Cu -I) Y */
        OpenIntObjectHashMap<Vector> CuMinusIY = new OpenIntObjectHashMap<Vector>();

        Iterator<Vector.Element> ratings = userRatings.iterateNonZero();
        while (ratings.hasNext()) {
            Vector.Element e = ratings.next();
            CuMinusIY.put(e.index(), sparseY.get(e.index()).times(confidence(e.get()) - 1));
            /* Y' Cu p(u) */
            YtransponseCuPu.assign(sparseY.get(e.index()).times(confidence(e.get())), Functions.PLUS);
        }

        Matrix YtransponseCuMinusIY = new DenseMatrix(numFeatures, numFeatures);

        /* Y' (Cu -I) Y by outer products */
        ratings = userRatings.iterateNonZero();
        while (ratings.hasNext()) {
            Vector.Element e = ratings.next();
            for (Vector.Element feature : sparseY.get(e.index())) {
                Vector partial = CuMinusIY.get(e.index()).times(feature.get());
                YtransponseCuMinusIY.viewRow(feature.index()).assign(partial, Functions.PLUS);
            }
        }

        /* Y' (Cu - I) Y +  I  add lambda on the diagonal */
        for (int feature = 0; feature < numFeatures; feature++) {
            YtransponseCuMinusIY.setQuick(feature, feature,
                    YtransponseCuMinusIY.getQuick(feature, feature) + lambda);
        }

        Matrix YtransposeCuPu = columnVectorAsMatrix(YtransponseCuPu);
        return solve(YtransposeY.plus(YtransponseCuMinusIY), YtransposeCuPu);
        //return YtransponseCuMinusIY;
    }

    /*
    // Y' Cu p(u) //
    private Matrix YtransponseCuPu(Vector userRatings) {
      Preconditions.checkArgument(userRatings.isSequentialAccess(), "need sequential access to ratings!");
        
      Vector YtransponseCuPu = new DenseVector(numFeatures);
        
      Iterator<Vector.Element> ratings = userRatings.iterateNonZero();
      while (ratings.hasNext()) {
        Vector.Element e = ratings.next();
        YtransponseCuPu.assign(Y.get(e.index()).times(confidence(e.get())), Functions.PLUS);
      }
        
      return columnVectorAsMatrix(YtransponseCuPu);
    }
    */
    private Matrix columnVectorAsMatrix(Vector v) {
        Matrix matrix = new DenseMatrix(numFeatures, 1);
        for (Vector.Element e : v) {
            matrix.setQuick(e.index(), 0, e.get());
        }
        return matrix;
    }

}