List of usage examples for org.apache.hadoop.io Text getBytes
@Override public byte[] getBytes()
From source file:org.apache.kylin.engine.mr.steps.InMemCuboidFromBaseCuboidMapper.java
License:Apache License
@Override protected ByteArray getRecordFromKeyValue(Text key, Text value) { keyValueBuffer.clear();/*ww w . ja va 2 s. c om*/ keyValueBuffer.put(key.getBytes(), keyOffset, key.getBytes().length - keyOffset); keyValueBuffer.put(value.getBytes()); byte[] keyValue = new byte[keyValueBuffer.position()]; System.arraycopy(keyValueBuffer.array(), 0, keyValue, 0, keyValueBuffer.position()); return new ByteArray(keyValue); }
From source file:org.apache.kylin.engine.mr.steps.MergeCuboidMapper.java
License:Apache License
@Override public void doMap(Text key, Text value, Context context) throws IOException, InterruptedException { long cuboidID = rowKeySplitter.split(key.getBytes()); Cuboid cuboid = Cuboid.findById(cubeDesc, cuboidID); RowKeyEncoder rowkeyEncoder = rowKeyEncoderProvider.getRowkeyEncoder(cuboid); SplittedBytes[] splittedByteses = rowKeySplitter.getSplitBuffers(); int bufOffset = 0; int bodySplitOffset = rowKeySplitter.getBodySplitOffset(); for (int i = 0; i < cuboid.getColumns().size(); ++i) { int useSplit = i + bodySplitOffset; TblColRef col = cuboid.getColumns().get(i); if (this.checkNeedMerging(col)) { // if dictionary on fact table column, needs rewrite DictionaryManager dictMgr = DictionaryManager.getInstance(config); Dictionary<String> mergedDict = dictMgr.getDictionary(mergedCubeSegment.getDictResPath(col)); Dictionary<String> sourceDict; // handle the column that all records is null if (sourceCubeSegment.getDictionary(col) == null) { BytesUtil.writeUnsigned(mergedDict.nullId(), newKeyBodyBuf, bufOffset, mergedDict.getSizeOfId()); bufOffset += mergedDict.getSizeOfId(); continue; } else { sourceDict = dictMgr.getDictionary(sourceCubeSegment.getDictResPath(col)); }//from w ww . j a va 2 s . c om while (sourceDict.getSizeOfValue() > newKeyBodyBuf.length - bufOffset || // mergedDict.getSizeOfValue() > newKeyBodyBuf.length - bufOffset || // mergedDict.getSizeOfId() > newKeyBodyBuf.length - bufOffset) { byte[] oldBuf = newKeyBodyBuf; newKeyBodyBuf = new byte[2 * newKeyBodyBuf.length]; System.arraycopy(oldBuf, 0, newKeyBodyBuf, 0, oldBuf.length); } int idInSourceDict = BytesUtil.readUnsigned(splittedByteses[useSplit].value, 0, splittedByteses[useSplit].length); int idInMergedDict; //int size = sourceDict.getValueBytesFromId(idInSourceDict, newKeyBodyBuf, bufOffset); String v = sourceDict.getValueFromId(idInSourceDict); if (v == null) { idInMergedDict = mergedDict.nullId(); } else { idInMergedDict = mergedDict.getIdFromValue(v); } BytesUtil.writeUnsigned(idInMergedDict, newKeyBodyBuf, bufOffset, mergedDict.getSizeOfId()); bufOffset += mergedDict.getSizeOfId(); } else { // keep as it is while (splittedByteses[useSplit].length > newKeyBodyBuf.length - bufOffset) { byte[] oldBuf = newKeyBodyBuf; newKeyBodyBuf = new byte[2 * newKeyBodyBuf.length]; System.arraycopy(oldBuf, 0, newKeyBodyBuf, 0, oldBuf.length); } System.arraycopy(splittedByteses[useSplit].value, 0, newKeyBodyBuf, bufOffset, splittedByteses[useSplit].length); bufOffset += splittedByteses[useSplit].length; } } int fullKeySize = rowkeyEncoder.getBytesLength(); while (newKeyBuf.array().length < fullKeySize) { newKeyBuf.set(new byte[newKeyBuf.length() * 2]); } newKeyBuf.set(0, fullKeySize); rowkeyEncoder.encode(new ByteArray(newKeyBodyBuf, 0, bufOffset), newKeyBuf); outputKey.set(newKeyBuf.array(), 0, fullKeySize); // re-encode measures if dictionary is used if (dictMeasures.size() > 0) { codec.decode(ByteBuffer.wrap(value.getBytes(), 0, value.getLength()), measureObjs); for (Pair<Integer, MeasureIngester> pair : dictMeasures) { int i = pair.getFirst(); MeasureIngester ingester = pair.getSecond(); measureObjs[i] = ingester.reEncodeDictionary(measureObjs[i], measureDescs.get(i), oldDicts, newDicts); } ByteBuffer valueBuf = codec.encode(measureObjs); outputValue.set(valueBuf.array(), 0, valueBuf.position()); value = outputValue; } context.write(outputKey, value); }
From source file:org.apache.kylin.engine.mr.steps.NDCuboidMapper.java
License:Apache License
@Override public void doMap(Text key, Text value, Context context) throws IOException, InterruptedException { long cuboidId = rowKeySplitter.split(key.getBytes()); Cuboid parentCuboid = Cuboid.findById(cubeDesc, cuboidId); Collection<Long> myChildren = cuboidScheduler.getSpanningCuboid(cuboidId); // if still empty or null if (myChildren == null || myChildren.size() == 0) { context.getCounter(BatchConstants.MAPREDUCE_COUNTER_GROUP_NAME, "Skipped records").increment(1L); if (skipCounter++ % BatchConstants.NORMAL_RECORD_LOG_THRESHOLD == 0) { logger.info("Skipping record with ordinal: " + skipCounter); }/*from w ww. ja va2 s . c o m*/ return; } context.getCounter(BatchConstants.MAPREDUCE_COUNTER_GROUP_NAME, "Processed records").increment(1L); if (handleCounter++ % BatchConstants.NORMAL_RECORD_LOG_THRESHOLD == 0) { logger.info("Handling record with ordinal: " + handleCounter); } for (Long child : myChildren) { Cuboid childCuboid = Cuboid.findById(cubeDesc, child); Pair<Integer, ByteArray> result = ndCuboidBuilder.buildKey(parentCuboid, childCuboid, rowKeySplitter.getSplitBuffers()); outputKey.set(result.getSecond().array(), 0, result.getFirst()); context.write(outputKey, value); } }
From source file:org.apache.kylin.engine.mr.steps.NumberDictionaryForestTest.java
License:Apache License
private String printKey(SelfDefineSortableKey key) { Text data = key.getText(); String fieldValue = Bytes.toString(data.getBytes(), 1, data.getLength() - 1); System.out.println("type flag:" + key.getTypeId() + " fieldValue:" + fieldValue); return fieldValue; }
From source file:org.apache.kylin.engine.mr.steps.NumberDictionaryForestTest.java
License:Apache License
private String getFieldValue(SelfDefineSortableKey key) { Text data = key.getText(); return Bytes.toString(data.getBytes(), 1, data.getLength() - 1); }
From source file:org.apache.kylin.engine.mr.steps.SegmentReEncoder.java
License:Apache License
/** * Re-encode with both dimension and measure in encoded (Text) format. * @param key//from w ww. j ava 2s . c o m * @param value * @return * @throws IOException */ public Pair<Text, Text> reEncode(Text key, Text value) throws IOException { if (initialized == false) { throw new IllegalStateException("Not initialized"); } Object[] measureObjs = new Object[measureDescs.size()]; // re-encode measures if dictionary is used if (dictMeasures.size() > 0) { codec.decode(ByteBuffer.wrap(value.getBytes(), 0, value.getLength()), measureObjs); for (Pair<Integer, MeasureIngester> pair : dictMeasures) { int i = pair.getFirst(); MeasureIngester ingester = pair.getSecond(); measureObjs[i] = ingester.reEncodeDictionary(measureObjs[i], measureDescs.get(i), oldDicts, newDicts); } ByteBuffer valueBuf = codec.encode(measureObjs); byte[] resultValue = new byte[valueBuf.position()]; System.arraycopy(valueBuf.array(), 0, resultValue, 0, valueBuf.position()); return Pair.newPair(processKey(key), new Text(resultValue)); } else { return Pair.newPair(processKey(key), value); } }
From source file:org.apache.kylin.engine.mr.steps.SegmentReEncoder.java
License:Apache License
/** * Re-encode with measures in Object[] format. * @param key//from w w w. j a v a2s .c o m * @param value * @return * @throws IOException */ public Pair<Text, Object[]> reEncode2(Text key, Text value) throws IOException { if (initialized == false) { throw new IllegalStateException("Not initialized"); } Object[] measureObjs = new Object[measureDescs.size()]; codec.decode(ByteBuffer.wrap(value.getBytes(), 0, value.getLength()), measureObjs); // re-encode measures if dictionary is used if (dictMeasures.size() > 0) { for (Pair<Integer, MeasureIngester> pair : dictMeasures) { int i = pair.getFirst(); MeasureIngester ingester = pair.getSecond(); measureObjs[i] = ingester.reEncodeDictionary(measureObjs[i], measureDescs.get(i), oldDicts, newDicts); } ByteBuffer valueBuf = codec.encode(measureObjs); byte[] resultValue = new byte[valueBuf.position()]; System.arraycopy(valueBuf.array(), 0, resultValue, 0, valueBuf.position()); } return Pair.newPair(processKey(key), measureObjs); }
From source file:org.apache.kylin.engine.mr.steps.SegmentReEncoder.java
License:Apache License
private Text processKey(Text key) throws IOException { long cuboidID = rowKeySplitter.split(key.getBytes()); Cuboid cuboid = Cuboid.findForMandatory(cubeDesc, cuboidID); RowKeyEncoder rowkeyEncoder = rowKeyEncoderProvider.getRowkeyEncoder(cuboid); ByteArray[] splittedByteses = rowKeySplitter.getSplitBuffers(); int bufOffset = 0; int bodySplitOffset = rowKeySplitter.getBodySplitOffset(); for (int i = 0; i < cuboid.getColumns().size(); ++i) { int useSplit = i + bodySplitOffset; TblColRef col = cuboid.getColumns().get(i); if (cubeDesc.getRowkey().isUseDictionary(col)) { // if dictionary on fact table column, needs rewrite DictionaryManager dictMgr = DictionaryManager.getInstance(kylinConfig); Dictionary<String> mergedDict = dictMgr.getDictionary(mergedSeg.getDictResPath(col)); // handle the dict of all merged segments is null if (mergedDict == null) { continue; }/*from w w w . j a v a 2s . c o m*/ Dictionary<String> sourceDict; // handle the column that all records is null if (mergingSeg.getDictionary(col) == null) { BytesUtil.writeUnsigned(mergedDict.nullId(), newKeyBodyBuf, bufOffset, mergedDict.getSizeOfId()); bufOffset += mergedDict.getSizeOfId(); continue; } else { sourceDict = dictMgr.getDictionary(mergingSeg.getDictResPath(col)); } while (sourceDict.getSizeOfValue() > newKeyBodyBuf.length - bufOffset || // mergedDict.getSizeOfValue() > newKeyBodyBuf.length - bufOffset || // mergedDict.getSizeOfId() > newKeyBodyBuf.length - bufOffset) { byte[] oldBuf = newKeyBodyBuf; newKeyBodyBuf = new byte[2 * newKeyBodyBuf.length]; System.arraycopy(oldBuf, 0, newKeyBodyBuf, 0, oldBuf.length); } int idInSourceDict = BytesUtil.readUnsigned(splittedByteses[useSplit].array(), splittedByteses[useSplit].offset(), splittedByteses[useSplit].length()); int idInMergedDict; //int size = sourceDict.getValueBytesFromId(idInSourceDict, newKeyBodyBuf, bufOffset); String v = sourceDict.getValueFromId(idInSourceDict); if (v == null) { idInMergedDict = mergedDict.nullId(); } else { idInMergedDict = mergedDict.getIdFromValue(v); } BytesUtil.writeUnsigned(idInMergedDict, newKeyBodyBuf, bufOffset, mergedDict.getSizeOfId()); bufOffset += mergedDict.getSizeOfId(); } else { // keep as it is while (splittedByteses[useSplit].length() > newKeyBodyBuf.length - bufOffset) { byte[] oldBuf = newKeyBodyBuf; newKeyBodyBuf = new byte[2 * newKeyBodyBuf.length]; System.arraycopy(oldBuf, 0, newKeyBodyBuf, 0, oldBuf.length); } System.arraycopy(splittedByteses[useSplit].array(), splittedByteses[useSplit].offset(), newKeyBodyBuf, bufOffset, splittedByteses[useSplit].length()); bufOffset += splittedByteses[useSplit].length(); } } int fullKeySize = rowkeyEncoder.getBytesLength(); while (newKeyBuf.array().length < fullKeySize) { newKeyBuf = new ByteArray(newKeyBuf.length() * 2); } newKeyBuf.setLength(fullKeySize); rowkeyEncoder.encode(new ByteArray(newKeyBodyBuf, 0, bufOffset), newKeyBuf); byte[] resultKey = new byte[fullKeySize]; System.arraycopy(newKeyBuf.array(), 0, resultKey, 0, fullKeySize); return new Text(resultKey); }
From source file:org.apache.kylin.engine.mr.steps.SelfDefineSortableKey.java
License:Apache License
public void init(Text key, byte typeId) { this.typeId = typeId; this.rawKey = key; if (isNumberFamily()) { String valueStr = new String(key.getBytes(), 1, key.getLength() - 1); if (isIntegerFamily()) { this.keyInObj = Long.parseLong(valueStr); } else {/*from ww w . j ava 2 s . c om*/ this.keyInObj = Double.parseDouble(valueStr); } } else { this.keyInObj = key; } }
From source file:org.apache.kylin.engine.mr.steps.UHCDictionaryMapper.java
License:Apache License
@Override public void doMap(NullWritable key, Text value, Context context) throws IOException, InterruptedException { tmpBuf.clear();/*from ww w . j a va 2 s . c om*/ int size = value.getLength() + 1; if (size >= tmpBuf.capacity()) { tmpBuf = ByteBuffer.allocate(countNewSize(tmpBuf.capacity(), size)); } tmpBuf.put(Bytes.toBytes(index)[3]); tmpBuf.put(value.getBytes(), 0, value.getLength()); outputKey.set(tmpBuf.array(), 0, tmpBuf.position()); sortableKey.init(outputKey, type); context.write(sortableKey, NullWritable.get()); }