org.apache.hama.computemodel.mapreduce.ShuffleAndDistribute.java Source code

Java tutorial

Introduction

Here is the source code for org.apache.hama.computemodel.mapreduce.ShuffleAndDistribute.java

Source

/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.hama.computemodel.mapreduce;

import java.io.IOException;
import java.util.HashMap;
import java.util.Iterator;
import java.util.Map;
import java.util.PriorityQueue;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.io.WritableComparable;
import org.apache.hadoop.mapred.Partitioner;
import org.apache.hadoop.mapred.lib.HashPartitioner;
import org.apache.hadoop.util.ReflectionUtils;
import org.apache.hama.bsp.BSPPeer;
import org.apache.hama.bsp.Superstep;
import org.apache.hama.bsp.message.DiskQueue;
import org.apache.hama.bsp.sync.SyncException;
import org.apache.hama.computemodel.mapreduce.ReducerKeyDesignator.DesignateStrategy;
import org.apache.hama.computemodel.mapreduce.ReducerKeyDesignator.KeyDistribution;

public class ShuffleAndDistribute<K2 extends WritableComparable<?>, V2 extends Writable> extends
        Superstep<NullWritable, NullWritable, K2, V2, WritableKeyValues<? extends WritableComparable<?>, ? extends Writable>> {

    Log LOG = LogFactory.getLog(ShuffleAndDistribute.class);

    // private Map<Long, Long> keyDistributionMap = new HashMap<Long, Long>();
    private long[][] globalKeyDistribution;
    private PriorityQueue<WritableKeyValues<K2, V2>> memoryQueue;

    @Override
    protected void setup(
            BSPPeer<NullWritable, NullWritable, K2, V2, WritableKeyValues<? extends WritableComparable<?>, ? extends Writable>> peer) {
        // TODO Auto-generated method stub
        super.setup(peer);

    }

    protected void designateKeysToReducers(int[] keyDistribution, final long[][] globalKeyDistribution,
            Configuration conf) {
        Class<?> designatorClass = conf.getClass("", null);
        ReducerKeyDesignator designator = null;
        if (designatorClass == null) {
            designator = ReducerKeyDesignator.getReduceDesignator(DesignateStrategy.MINIMIZE_COMMUNICATION,
                    KeyDistribution.CONTIGUOUS);
        } else {
            designator = (ReducerKeyDesignator) (ReflectionUtils.newInstance(designatorClass, conf));
        }

        designator.designateKeysToReducers(keyDistribution, globalKeyDistribution, conf);

    }

    @Override
    protected void compute(
            BSPPeer<NullWritable, NullWritable, K2, V2, WritableKeyValues<? extends WritableComparable<?>, ? extends Writable>> peer)
            throws IOException {
        int peerId = peer.getPeerId();
        Configuration conf = peer.getConfiguration();

        this.memoryQueue = (PriorityQueue<WritableKeyValues<K2, V2>>) peer.getSavedObject(Mapper.MESSAGE_QUEUE);

        this.globalKeyDistribution = (long[][]) peer.getSavedObject(Mapper.KEY_DIST);

        WritableKeyValues<WritableKeyValues<IntWritable, IntWritable>, LongWritable> message;
        while ((message = (WritableKeyValues<WritableKeyValues<IntWritable, IntWritable>, LongWritable>) peer
                .getCurrentMessage()) != null) {
            int peerNo = message.getKey().getKey().get();
            int partition = message.getKey().getValue().get();
            globalKeyDistribution[peerNo][partition] += message.getValue().get();
        }

        int[] keyDistribution = new int[globalKeyDistribution[0].length];

        designateKeysToReducers(keyDistribution, globalKeyDistribution, conf);

        int myKeyCount = 0;
        for (int i = 0; i < globalKeyDistribution[0].length; ++i) {
            myKeyCount += globalKeyDistribution[peerId][i];
        }

        PriorityQueue<WritableKeyValues<K2, V2>> mergeQueue = new PriorityQueue<WritableKeyValues<K2, V2>>(
                myKeyCount);
        Partitioner<K2, V2> partitioner = (Partitioner<K2, V2>) ReflectionUtils
                .newInstance(conf.getClass(Mapper.PARTITIONER_CLASS, HashPartitioner.class), conf);

        Iterator<WritableKeyValues<K2, V2>> keyValIter = this.memoryQueue.iterator();
        String[] peerNames = peer.getAllPeerNames();
        while (keyValIter.hasNext()) {
            WritableKeyValues<K2, V2> record = keyValIter.next();
            int partition = partitioner.getPartition(record.getKey(), record.getValue(), peer.getNumPeers()); // should be num reducers
                                                                                                              // eventually
            int destPeerId = keyDistribution[partition];
            if (peerId != destPeerId) {
                peer.send(peerNames[destPeerId], record);
                keyValIter.remove();
            }
        }

    }
}