Java tutorial
/* * Copyright 2013-2014 eBay Software Foundation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.kylinolap.job.hadoop.cube; import java.io.IOException; import java.nio.ByteBuffer; import java.util.Arrays; import java.util.List; import org.apache.hadoop.hbase.KeyValue; import org.apache.hadoop.hbase.KeyValue.Type; import org.apache.hadoop.hbase.io.ImmutableBytesWritable; import org.apache.hadoop.hbase.util.Bytes; import org.apache.hadoop.io.Text; import com.google.common.collect.Lists; import com.kylinolap.common.KylinConfig; import com.kylinolap.common.mr.KylinMapper; import com.kylinolap.cube.CubeManager; import com.kylinolap.cube.kv.RowConstants; import com.kylinolap.cube.measure.MeasureCodec; import com.kylinolap.job.constant.BatchConstants; import com.kylinolap.job.hadoop.AbstractHadoopJob; import com.kylinolap.metadata.model.cube.CubeDesc; import com.kylinolap.metadata.model.cube.HBaseColumnDesc; import com.kylinolap.metadata.model.cube.HBaseColumnFamilyDesc; import com.kylinolap.metadata.model.cube.MeasureDesc; /** * @author George Song (ysong1) * */ public class CubeHFileMapper extends KylinMapper<Text, Text, ImmutableBytesWritable, KeyValue> { ImmutableBytesWritable outputKey = new ImmutableBytesWritable(); String cubeName; CubeDesc cubeDesc; MeasureCodec inputCodec; Object[] inputMeasures; List<KeyValueCreator> keyValueCreators; @Override protected void setup(Context context) throws IOException { super.publishConfiguration(context.getConfiguration()); cubeName = context.getConfiguration().get(BatchConstants.CFG_CUBE_NAME); KylinConfig config = AbstractHadoopJob.loadKylinPropsAndMetadata(context.getConfiguration()); CubeManager cubeMgr = CubeManager.getInstance(config); cubeDesc = cubeMgr.getCube(cubeName).getDescriptor(); inputCodec = new MeasureCodec(cubeDesc.getMeasures()); inputMeasures = new Object[cubeDesc.getMeasures().size()]; keyValueCreators = Lists.newArrayList(); for (HBaseColumnFamilyDesc cfDesc : cubeDesc.getHBaseMapping().getColumnFamily()) { for (HBaseColumnDesc colDesc : cfDesc.getColumns()) { keyValueCreators.add(new KeyValueCreator(cubeDesc, colDesc)); } } } @Override public void map(Text key, Text value, Context context) throws IOException, InterruptedException { outputKey.set(key.getBytes(), 0, key.getLength()); KeyValue outputValue; int n = keyValueCreators.size(); if (n == 1 && keyValueCreators.get(0).isFullCopy) { // shortcut for // simple full copy outputValue = keyValueCreators.get(0).create(key, value.getBytes(), 0, value.getLength()); context.write(outputKey, outputValue); } else { // normal (complex) case that distributes measures to multiple // HBase columns inputCodec.decode(value, inputMeasures); for (int i = 0; i < n; i++) { outputValue = keyValueCreators.get(i).create(key, inputMeasures); context.write(outputKey, outputValue); } } } class KeyValueCreator { byte[] cfBytes; byte[] qBytes; long timestamp; int[] refIndex; MeasureDesc[] refMeasures; MeasureCodec codec; Object[] colValues; ByteBuffer valueBuf = ByteBuffer.allocate(RowConstants.ROWVALUE_BUFFER_SIZE); boolean isFullCopy; public KeyValueCreator(CubeDesc cubeDesc, HBaseColumnDesc colDesc) { cfBytes = Bytes.toBytes(colDesc.getColumnFamilyName()); qBytes = Bytes.toBytes(colDesc.getQualifier()); timestamp = System.currentTimeMillis(); List<MeasureDesc> measures = cubeDesc.getMeasures(); String[] measureNames = getMeasureNames(cubeDesc); String[] refs = colDesc.getMeasureRefs(); refIndex = new int[refs.length]; refMeasures = new MeasureDesc[refs.length]; for (int i = 0; i < refs.length; i++) { refIndex[i] = indexOf(measureNames, refs[i]); refMeasures[i] = measures.get(refIndex[i]); } codec = new MeasureCodec(refMeasures); colValues = new Object[refs.length]; isFullCopy = true; for (int i = 0; i < measures.size(); i++) { if (refIndex.length <= i || refIndex[i] != i) isFullCopy = false; } } public KeyValue create(Text key, Object[] measureValues) { for (int i = 0; i < colValues.length; i++) { colValues[i] = measureValues[refIndex[i]]; } valueBuf.clear(); codec.encode(colValues, valueBuf); return create(key, valueBuf.array(), 0, valueBuf.position()); } public KeyValue create(Text key, byte[] value, int voffset, int vlen) { return new KeyValue(key.getBytes(), 0, key.getLength(), // cfBytes, 0, cfBytes.length, // qBytes, 0, qBytes.length, // timestamp, Type.Put, // value, voffset, vlen); } private int indexOf(String[] measureNames, String ref) { for (int i = 0; i < measureNames.length; i++) if (measureNames[i].equalsIgnoreCase(ref)) return i; throw new IllegalArgumentException( "Measure '" + ref + "' not found in " + Arrays.toString(measureNames)); } private String[] getMeasureNames(CubeDesc cubeDesc) { List<MeasureDesc> measures = cubeDesc.getMeasures(); String[] result = new String[measures.size()]; for (int i = 0; i < measures.size(); i++) result[i] = measures.get(i).getName(); return result; } } }