com.kylinolap.job.hadoop.cube.RangeKeyDistributionReducer.java Source code

Introduction

Here is the source code for com.kylinolap.job.hadoop.cube.RangeKeyDistributionReducer.java
Source

/*
 * Copyright 2013-2014 eBay Software Foundation
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *   http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package com.kylinolap.job.hadoop.cube;

import java.io.IOException;
import java.util.ArrayList;
import java.util.List;

import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.util.StringUtils;

import com.kylinolap.common.mr.KylinReducer;
import com.kylinolap.job.constant.BatchConstants;
import com.kylinolap.metadata.model.cube.CubeDesc.CubeCapacity;

/**
 * @author ysong1
 * 
 */
public class RangeKeyDistributionReducer extends KylinReducer<Text, LongWritable, Text, LongWritable> {

    public static final long ONE_GIGA_BYTES = 1024L * 1024L * 1024L;
    public static final int SMALL_CUT = 10; //  10 GB per region
    public static final int MEDIUM_CUT = 20; //  20 GB per region
    public static final int LARGE_CUT = 100; // 100 GB per region

    public static final int MAX_REGION = 200;

    private LongWritable outputValue = new LongWritable(0);

    private int cut;
    private long bytesRead = 0;
    private List<Text> gbPoints = new ArrayList<Text>();

    @Override
    protected void setup(Context context) throws IOException {
        super.publishConfiguration(context.getConfiguration());

        CubeCapacity cubeCapacity = CubeCapacity
                .valueOf(context.getConfiguration().get(BatchConstants.CUBE_CAPACITY));
        switch (cubeCapacity) {
        case SMALL:
            cut = SMALL_CUT;
            break;
        case MEDIUM:
            cut = MEDIUM_CUT;
            break;
        case LARGE:
            cut = LARGE_CUT;
            break;
        }
    }

    @Override
    public void reduce(Text key, Iterable<LongWritable> values, Context context)
            throws IOException, InterruptedException {
        for (LongWritable v : values) {
            bytesRead += v.get();
        }

        if (bytesRead >= ONE_GIGA_BYTES) {
            gbPoints.add(new Text(key));
            bytesRead = 0; // reset bytesRead
        }
    }

    @Override
    protected void cleanup(Context context) throws IOException, InterruptedException {
        int nRegion = Math.round((float) gbPoints.size() / (float) cut);
        nRegion = Math.max(1, nRegion);
        nRegion = Math.min(MAX_REGION, nRegion);

        int gbPerRegion = gbPoints.size() / nRegion;
        gbPerRegion = Math.max(1, gbPerRegion);

        System.out.println(nRegion + " regions");
        System.out.println(gbPerRegion + " GB per region");

        for (int i = gbPerRegion; i < gbPoints.size(); i += gbPerRegion) {
            Text key = gbPoints.get(i);
            outputValue.set(i);
            System.out.println(StringUtils.byteToHexString(key.getBytes()) + "\t" + outputValue.get());
            context.write(key, outputValue);
        }
    }
}