nl.gridline.zieook.inx.movielens.items.ItemBasedSortSimilaritiesMapper.java Source code

Java tutorial

Introduction

Here is the source code for nl.gridline.zieook.inx.movielens.items.ItemBasedSortSimilaritiesMapper.java

Source

/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 */
package nl.gridline.zieook.inx.movielens.items;

import java.io.IOException;
import java.util.Arrays;
import java.util.Comparator;
import java.util.Iterator;

import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.mahout.math.VarIntWritable;
import org.apache.mahout.math.Vector;
import org.apache.mahout.math.Vector.Element;
import org.apache.mahout.math.VectorWritable;

/**
 * maps a row of the similarity matrix to an sorted array of recommendations
 * input row is stored in a sparse vector
 * first, maximum index occurring in the sparse vector is determined
 * (quite often this maximum index is the same for all row vectors)
 * next, store sparse row vector in recommendations array of sufficient length
 * then, sort the recommendations array on the basis of recommendation similarity values
 * output sorted data as recommendations array
 * <p />
 * Project zieook-movielens<br />
 * ItemBased_SortSimilaritiesMapper.java created 4 mrt. 2011
 * <p />
 * Copyright, all rights reserved 2011 GridLine Amsterdam
 * @author <a href="mailto:gerlof@gridline.nl">Gerlof</a>
 * @version $Revision:$, $Date:$
 */
public final class ItemBasedSortSimilaritiesMapper
        extends Mapper<IntWritable, VectorWritable, VarIntWritable, RecommendationElementArray> {

    @Override
    protected void map(IntWritable key, VectorWritable value, Context context)
            throws IOException, InterruptedException {
        int maxIndex = -1;

        Vector similarityMatrixRow = value.get();
        /* remove self similarity */

        similarityMatrixRow.set(key.get(), Double.NEGATIVE_INFINITY);

        //
        // determine maximum index
        //
        Iterator<Element> it = similarityMatrixRow.iterateNonZero();

        while (it.hasNext()) {
            Element e = it.next();

            // e.index() // == item id

            if (e.index() > maxIndex) {
                maxIndex = e.index();
            }
        }

        // System.out.println(String.format("key: %d maxIndex: %d", key.get(), maxIndex));

        if (maxIndex > 0) {

            RecommendationElement[] itemBasedRecommendations = new RecommendationElement[maxIndex];

            for (int i = 0; i < maxIndex; i++) {
                Element element = similarityMatrixRow.getElement(i);

                double similarityValue = Double.NEGATIVE_INFINITY;

                if (element != null) {
                    similarityValue = element.get();
                }

                itemBasedRecommendations[i] = new RecommendationElement(i, similarityValue);
            }

            Arrays.sort(itemBasedRecommendations, new SimilarityComparator());

            RecommendationElementArray array = new RecommendationElementArray(itemBasedRecommendations);

            context.write(new VarIntWritable(key.get()), array);

        }
    }

    private static class SimilarityComparator implements Comparator<RecommendationElement> {

        @Override
        public int compare(RecommendationElement recommendation1, RecommendationElement recommendation2) {
            final double epsilon = 0.0001;
            final double value1 = recommendation1.getSimilarityValue();
            final double value2 = recommendation2.getSimilarityValue();

            if (Math.abs(value1 - value2) < epsilon) {
                return 0;
            }
            if (value1 > value2) {
                return -1;
            }
            return 1;
        }

    }

}