List of usage examples for org.apache.hadoop.io Text getBytes
@Override public byte[] getBytes()
From source file:org.apache.kylin.engine.mr.steps.UHCDictionaryReducer.java
License:Apache License
@Override public void doReduce(SelfDefineSortableKey skey, Iterable<NullWritable> values, Context context) throws IOException, InterruptedException { Text key = skey.getText(); String value = Bytes.toString(key.getBytes(), 1, key.getLength() - 1); builder.addValue(value);//ww w .j a va 2 s. c o m }
From source file:org.apache.kylin.engine.mr.steps.UpdateOldCuboidShardMapper.java
License:Apache License
@Override public void doMap(Text key, Text value, Context context) throws IOException, InterruptedException { long cuboidID = rowKeySplitter.split(key.getBytes()); Cuboid cuboid = Cuboid.findForMandatory(cubeDesc, cuboidID); int fullKeySize = buildKey(cuboid, rowKeySplitter.getSplitBuffers()); outputKey.set(newKeyBuf.array(), 0, fullKeySize); String baseOutputPath = PathNameCuboidOld; if (cuboidID == baseCuboid) { baseOutputPath = PathNameCuboidBase; }/* w w w.j av a 2 s . c o m*/ mos.write(outputKey, value, generateFileName(baseOutputPath)); }
From source file:org.apache.kylin.engine.spark.SparkUtil.java
License:Apache License
public static JavaRDD<String[]> hiveRecordInputRDD(boolean isSequenceFile, JavaSparkContext sc, String inputPath, String hiveTable) { JavaRDD<String[]> recordRDD; if (isSequenceFile) { recordRDD = sc.sequenceFile(inputPath, BytesWritable.class, Text.class).values() .map(new Function<Text, String[]>() { @Override/* ww w . jav a 2 s. c om*/ public String[] call(Text text) throws Exception { String s = Bytes.toString(text.getBytes(), 0, text.getLength()); return s.split(BatchConstants.SEQUENCE_FILE_DEFAULT_DELIMITER); } }); } else { SparkSession sparkSession = SparkSession.builder().config(sc.getConf()).enableHiveSupport() .getOrCreate(); final Dataset intermediateTable = sparkSession.table(hiveTable); recordRDD = intermediateTable.javaRDD().map(new Function<Row, String[]>() { @Override public String[] call(Row row) throws Exception { String[] result = new String[row.size()]; for (int i = 0; i < row.size(); i++) { final Object o = row.get(i); if (o != null) { result[i] = o.toString(); } else { result[i] = null; } } return result; } }); } return recordRDD; }
From source file:org.apache.kylin.job.hadoop.cube.BaseCuboidMapperTest.java
License:Apache License
@Test public void testMapperWithHeader() throws Exception { String cubeName = "test_kylin_cube_with_slr_1_new_segment"; String segmentName = "20130331080000_20131212080000"; mapDriver.getConfiguration().set(BatchConstants.CFG_CUBE_NAME, cubeName); mapDriver.getConfiguration().set(BatchConstants.CFG_CUBE_SEGMENT_NAME, segmentName); // mapDriver.getConfiguration().set(BatchConstants.CFG_METADATA_URL, // metadata); mapDriver.withInput(new Text("key"), new Text("2012-12-15118480Health & BeautyFragrancesWomenAuction15123456789132.33")); List<Pair<Text, Text>> result = mapDriver.run(); CubeManager cubeMgr = CubeManager.getInstance(getTestConfig()); CubeInstance cube = cubeMgr.getCube(cubeName); assertEquals(1, result.size());/*w w w. ja v a 2s . co m*/ Text rowkey = result.get(0).getFirst(); byte[] key = rowkey.getBytes(); byte[] header = Bytes.head(key, 26); byte[] sellerId = Bytes.tail(header, 18); byte[] cuboidId = Bytes.head(header, 8); byte[] restKey = Bytes.tail(key, rowkey.getLength() - 26); RowKeyDecoder decoder = new RowKeyDecoder(cube.getFirstSegment()); decoder.decode(key); assertEquals("[123456789, 2012-12-15, 11848, Health & Beauty, Fragrances, Women, Auction, 0, 15]", decoder.getValues().toString()); assertTrue(Bytes.toString(sellerId).startsWith("123456789")); assertEquals(511, Bytes.toLong(cuboidId)); assertEquals(22, restKey.length); verifyMeasures(cube.getDescriptor().getMeasures(), result.get(0).getSecond(), "132.33", "132.33", "132.33"); }
From source file:org.apache.kylin.job.hadoop.cube.BaseCuboidMapperTest.java
License:Apache License
@Test public void testMapperWithNull() throws Exception { String cubeName = "test_kylin_cube_with_slr_1_new_segment"; String segmentName = "20130331080000_20131212080000"; mapDriver.getConfiguration().set(BatchConstants.CFG_CUBE_NAME, cubeName); mapDriver.getConfiguration().set(BatchConstants.CFG_CUBE_SEGMENT_NAME, segmentName); // mapDriver.getConfiguration().set(BatchConstants.CFG_METADATA_URL, // metadata); mapDriver.withInput(new Text("key"), new Text("2012-12-15118480Health & BeautyFragrances\\NAuction15123456789\\N")); List<Pair<Text, Text>> result = mapDriver.run(); CubeManager cubeMgr = CubeManager.getInstance(getTestConfig()); CubeInstance cube = cubeMgr.getCube(cubeName); assertEquals(1, result.size());/*from w ww . j av a2 s . c om*/ Text rowkey = result.get(0).getFirst(); byte[] key = rowkey.getBytes(); byte[] header = Bytes.head(key, 26); byte[] sellerId = Bytes.tail(header, 18); byte[] cuboidId = Bytes.head(header, 8); byte[] restKey = Bytes.tail(key, rowkey.getLength() - 26); RowKeyDecoder decoder = new RowKeyDecoder(cube.getFirstSegment()); decoder.decode(key); assertEquals("[123456789, 2012-12-15, 11848, Health & Beauty, Fragrances, null, Auction, 0, 15]", decoder.getValues().toString()); assertTrue(Bytes.toString(sellerId).startsWith("123456789")); assertEquals(511, Bytes.toLong(cuboidId)); assertEquals(22, restKey.length); verifyMeasures(cube.getDescriptor().getMeasures(), result.get(0).getSecond(), "0", "0", "0"); }
From source file:org.apache.kylin.job.hadoop.cube.CubeHFileMapper2Test.java
License:Apache License
@Test public void testBasic() throws Exception { Configuration hconf = new Configuration(); Context context = MockupMapContext.create(hconf, getTestConfig().getMetadataUrl(), cubeName, outKV); CubeHFileMapper mapper = new CubeHFileMapper(); mapper.setup(context);/*from www.j av a2 s. co m*/ Text key = new Text("not important"); Text value = new Text(new byte[] { 2, 2, 51, -79, 1 }); mapper.map(key, value, context); ImmutableBytesWritable outKey = (ImmutableBytesWritable) outKV[0]; KeyValue outValue = (KeyValue) outKV[1]; assertTrue(Bytes.compareTo(key.getBytes(), 0, key.getLength(), outKey.get(), outKey.getOffset(), outKey.getLength()) == 0); assertTrue(Bytes.compareTo(value.getBytes(), 0, value.getLength(), outValue.getValueArray(), outValue.getValueOffset(), outValue.getValueLength()) == 0); }
From source file:org.apache.kylin.job.hadoop.cube.MergeCuboidMapper.java
License:Apache License
@Override public void map(Text key, Text value, Context context) throws IOException, InterruptedException { long cuboidID = rowKeySplitter.split(key.getBytes(), key.getBytes().length); Cuboid cuboid = Cuboid.findById(cubeDesc, cuboidID); SplittedBytes[] splittedByteses = rowKeySplitter.getSplitBuffers(); int bufOffset = 0; BytesUtil.writeLong(cuboidID, newKeyBuf, bufOffset, RowConstants.ROWKEY_CUBOIDID_LEN); bufOffset += RowConstants.ROWKEY_CUBOIDID_LEN; for (int i = 0; i < cuboid.getColumns().size(); ++i) { TblColRef col = cuboid.getColumns().get(i); if (this.checkNeedMerging(col)) { // if dictionary on fact table column, needs rewrite DictionaryManager dictMgr = DictionaryManager.getInstance(config); Dictionary<?> sourceDict = dictMgr.getDictionary(sourceCubeSegment.getDictResPath(col)); Dictionary<?> mergedDict = dictMgr.getDictionary(mergedCubeSegment.getDictResPath(col)); while (sourceDict.getSizeOfValue() > newKeyBuf.length - bufOffset || mergedDict.getSizeOfValue() > newKeyBuf.length - bufOffset) { byte[] oldBuf = newKeyBuf; newKeyBuf = new byte[2 * newKeyBuf.length]; System.arraycopy(oldBuf, 0, newKeyBuf, 0, oldBuf.length); }//from w ww . j a v a2s . c om int idInSourceDict = BytesUtil.readUnsigned(splittedByteses[i + 1].value, 0, splittedByteses[i + 1].length); int idInMergedDict; int size = sourceDict.getValueBytesFromId(idInSourceDict, newKeyBuf, bufOffset); if (size < 0) { idInMergedDict = mergedDict.nullId(); } else { idInMergedDict = mergedDict.getIdFromValueBytes(newKeyBuf, bufOffset, size); } BytesUtil.writeUnsigned(idInMergedDict, newKeyBuf, bufOffset, mergedDict.getSizeOfId()); bufOffset += mergedDict.getSizeOfId(); } else { // keep as it is while (splittedByteses[i + 1].length > newKeyBuf.length - bufOffset) { byte[] oldBuf = newKeyBuf; newKeyBuf = new byte[2 * newKeyBuf.length]; System.arraycopy(oldBuf, 0, newKeyBuf, 0, oldBuf.length); } System.arraycopy(splittedByteses[i + 1].value, 0, newKeyBuf, bufOffset, splittedByteses[i + 1].length); bufOffset += splittedByteses[i + 1].length; } } byte[] newKey = Arrays.copyOf(newKeyBuf, bufOffset); outputKey.set(newKey, 0, newKey.length); context.write(outputKey, value); }
From source file:org.apache.kylin.storage.hbase.steps.CubeHFileMapper.java
License:Apache License
@Override public void doMap(Text key, Text value, Context context) throws IOException, InterruptedException { outputKey.set(key.getBytes(), 0, key.getLength()); KeyValue outputValue;/*from w ww . j a va2 s. c o m*/ int n = keyValueCreators.size(); if (n == 1 && keyValueCreators.get(0).isFullCopy) { // shortcut for simple full copy outputValue = keyValueCreators.get(0).create(key, value.getBytes(), 0, value.getLength()); context.write(outputKey, outputValue); } else { // normal (complex) case that distributes measures to multiple HBase columns inputCodec.decode(ByteBuffer.wrap(value.getBytes(), 0, value.getLength()), inputMeasures); for (int i = 0; i < n; i++) { outputValue = keyValueCreators.get(i).create(key, inputMeasures); context.write(outputKey, outputValue); } } }
From source file:org.apache.kylin.storage.hbase.steps.CubeHFileMapper2Test.java
License:Apache License
@Test public void testBasic() throws Exception { Configuration hconf = HadoopUtil.getCurrentConfiguration(); Context context = MockupMapContext.create(hconf, cubeName, outKV); CubeHFileMapper mapper = new CubeHFileMapper(); mapper.setup(context);//from w ww . java 2s . com Text key = new Text("not important"); Text value = new Text(new byte[] { 2, 2, 51, -79, 1 }); mapper.map(key, value, context); ImmutableBytesWritable outKey = (ImmutableBytesWritable) outKV[0]; KeyValue outValue = (KeyValue) outKV[1]; assertTrue(Bytes.compareTo(key.getBytes(), 0, key.getLength(), outKey.get(), outKey.getOffset(), outKey.getLength()) == 0); assertTrue(Bytes.compareTo(value.getBytes(), 0, value.getLength(), outValue.getValueArray(), outValue.getValueOffset(), outValue.getValueLength()) == 0); }
From source file:org.apache.kylin.storage.hbase.steps.KeyValueCreator.java
License:Apache License
public KeyValue create(Text key, Object[] measureValues) { return create(key.getBytes(), 0, key.getLength(), measureValues); }