nl.gridline.zieook.statistics.popularity.PopularityMap.java Source code

Java tutorial

Introduction

Here is the source code for nl.gridline.zieook.statistics.popularity.PopularityMap.java

Source

/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 */
package nl.gridline.zieook.statistics.popularity;

import java.io.IOException;
import java.util.Collection;
import java.util.Map;

import nl.gridline.zieook.mapreduce.HBaseTableConstants;
import nl.gridline.zieook.mapreduce.RowKeys;
import nl.gridline.zieook.model.ModelConstants;
import nl.gridline.zieook.statistics.popularity.io.ItemTime;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.hbase.KeyValue;
import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.hadoop.hbase.mapreduce.TableMapper;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.io.LongWritable;

/**
 * [purpose]
 * <p />
 * Project zieook-movielens<br />
 * PopularityMap.java created 13 dec. 2011
 * <p />
 * Copyright, all rights reserved 2011 GridLine Amsterdam
 * @author <a href="mailto:job@gridline.nl">Job</a>
 * @version $Revision:$, $Date:$
 */
public class PopularityMap extends TableMapper<ItemTime, LongWritable> {
    private static final Log LOG = LogFactory.getLog(PopularityMap.class);

    // find ratings per item within a given time range
    private static final byte[] COLUMN_RATING = Bytes.toBytes(HBaseTableConstants.USERTABLE_COLUMN_RATING);

    private final ItemTime ratedItem = new ItemTime();
    private final LongWritable one = new LongWritable(1);

    private Collection<Long> types;
    private Map<Long, String> typeMap;
    private long now;

    @Override
    protected void setup(Context context) throws IOException, InterruptedException {

        String types = context.getConfiguration().get(PopularityStatistics.TYPES);
        if (types == null) {
            throw new IOException(
                    "failed to read <" + PopularityStatistics.TYPES + "> please set the configuration item");
        }

        now = context.getConfiguration().getLong(PopularityStatistics.NOW, Long.MIN_VALUE);
        if (now == Long.MIN_VALUE) {
            throw new IOException(
                    "failed to read <" + PopularityStatistics.NOW + "> please set the configuration item");
        }

        typeMap = PopularityStatistics.getTypes(types);

        this.types = typeMap.keySet();
        LOG.info("setting now to: " + now);
        LOG.info("types: '" + types + "' split into " + this.types.size() + " items");
        for (Map.Entry<Long, String> entry : typeMap.entrySet()) {
            LOG.info(entry.getValue() + " <" + (now - entry.getKey()) + "," + now + ">");
        }

    };

    @Override
    protected void map(ImmutableBytesWritable key, Result value, Context context)
            throws java.io.IOException, InterruptedException {

        long item = 0;
        long date = 0;
        String collection = null;

        KeyValue itemKV = value.getColumnLatest(COLUMN_RATING, ModelConstants.ITEM);
        if (itemKV != null) {
            item = Bytes.toLong(itemKV.getValue());
        } else {
            LOG.error("no key-value " + Bytes.toString(ModelConstants.ITEM) + " for "
                    + Bytes.toStringBinary(value.getRow()));
        }

        KeyValue dateKV = value.getColumnLatest(COLUMN_RATING, ModelConstants.TIMESTAMP);
        if (dateKV != null) {
            date = Bytes.toLong(dateKV.getValue());
        } else {
            LOG.error("no key-value " + Bytes.toString(ModelConstants.TIMESTAMP) + " for "
                    + Bytes.toStringBinary(value.getRow()));
        }

        KeyValue collectionKV = value.getColumnLatest(COLUMN_RATING, ModelConstants.COLLECTION);
        if (collectionKV != null) {
            collection = Bytes.toString(collectionKV.getValue());
        } else {
            // previously we didn't store the collection in separate field.
            // but we can retrieve it using the rowkey
            // this is a fallback:

            Object[] parts = RowKeys.getStrStrLngLng(value.getRow());
            collection = parts[1].toString();

            if (collection != null) {
                LOG.warn("no key-value " + Bytes.toString(ModelConstants.COLLECTION)
                        + " using decomposed row key fall-back: " + collection);
            } else {
                LOG.error("could not determine " + Bytes.toString(ModelConstants.COLLECTION) + " for "
                        + Bytes.toStringBinary(value.getRow()));
            }

        }

        // remark, we check for null-ness of the collection string, as this can be obtained through the fallback:
        if (itemKV != null && dateKV != null && collection != null) {
            for (long type : types) {
                final long earliest = now - type;
                if (date > earliest && date < now) {
                    context.write(ratedItem.set(item, collection, type), one);
                }
            }

        }
        context.progress();
        // write ratings with: itemid-timerange
    };
}