Example usage for org.apache.hadoop.io Text getBytes

List of usage examples for org.apache.hadoop.io Text getBytes

Introduction

In this page you can find the example usage for org.apache.hadoop.io Text getBytes.

Prototype

@Override
public byte[] getBytes() 

Source Link

Document

Returns the raw bytes; however, only data up to #getLength() is valid.

Usage

From source file:org.apache.kylin.engine.mr.steps.UHCDictionaryReducer.java

License:Apache License

@Override
public void doReduce(SelfDefineSortableKey skey, Iterable<NullWritable> values, Context context)
        throws IOException, InterruptedException {
    Text key = skey.getText();
    String value = Bytes.toString(key.getBytes(), 1, key.getLength() - 1);
    builder.addValue(value);//ww  w  .j a  va  2  s.  c o m
}

From source file:org.apache.kylin.engine.mr.steps.UpdateOldCuboidShardMapper.java

License:Apache License

@Override
public void doMap(Text key, Text value, Context context) throws IOException, InterruptedException {
    long cuboidID = rowKeySplitter.split(key.getBytes());

    Cuboid cuboid = Cuboid.findForMandatory(cubeDesc, cuboidID);
    int fullKeySize = buildKey(cuboid, rowKeySplitter.getSplitBuffers());
    outputKey.set(newKeyBuf.array(), 0, fullKeySize);

    String baseOutputPath = PathNameCuboidOld;
    if (cuboidID == baseCuboid) {
        baseOutputPath = PathNameCuboidBase;
    }/*  w w w.j av a  2 s  .  c  o m*/
    mos.write(outputKey, value, generateFileName(baseOutputPath));
}

From source file:org.apache.kylin.engine.spark.SparkUtil.java

License:Apache License

public static JavaRDD<String[]> hiveRecordInputRDD(boolean isSequenceFile, JavaSparkContext sc,
        String inputPath, String hiveTable) {
    JavaRDD<String[]> recordRDD;

    if (isSequenceFile) {
        recordRDD = sc.sequenceFile(inputPath, BytesWritable.class, Text.class).values()
                .map(new Function<Text, String[]>() {
                    @Override/*  ww  w . jav  a 2  s.  c om*/
                    public String[] call(Text text) throws Exception {
                        String s = Bytes.toString(text.getBytes(), 0, text.getLength());
                        return s.split(BatchConstants.SEQUENCE_FILE_DEFAULT_DELIMITER);
                    }
                });
    } else {
        SparkSession sparkSession = SparkSession.builder().config(sc.getConf()).enableHiveSupport()
                .getOrCreate();
        final Dataset intermediateTable = sparkSession.table(hiveTable);
        recordRDD = intermediateTable.javaRDD().map(new Function<Row, String[]>() {
            @Override
            public String[] call(Row row) throws Exception {
                String[] result = new String[row.size()];
                for (int i = 0; i < row.size(); i++) {
                    final Object o = row.get(i);
                    if (o != null) {
                        result[i] = o.toString();
                    } else {
                        result[i] = null;
                    }
                }
                return result;
            }
        });
    }

    return recordRDD;
}

From source file:org.apache.kylin.job.hadoop.cube.BaseCuboidMapperTest.java

License:Apache License

@Test
public void testMapperWithHeader() throws Exception {
    String cubeName = "test_kylin_cube_with_slr_1_new_segment";
    String segmentName = "20130331080000_20131212080000";
    mapDriver.getConfiguration().set(BatchConstants.CFG_CUBE_NAME, cubeName);
    mapDriver.getConfiguration().set(BatchConstants.CFG_CUBE_SEGMENT_NAME, segmentName);
    // mapDriver.getConfiguration().set(BatchConstants.CFG_METADATA_URL,
    // metadata);
    mapDriver.withInput(new Text("key"),
            new Text("2012-12-15118480Health & BeautyFragrancesWomenAuction15123456789132.33"));
    List<Pair<Text, Text>> result = mapDriver.run();

    CubeManager cubeMgr = CubeManager.getInstance(getTestConfig());
    CubeInstance cube = cubeMgr.getCube(cubeName);

    assertEquals(1, result.size());/*w  w w. ja v a  2s . co  m*/
    Text rowkey = result.get(0).getFirst();
    byte[] key = rowkey.getBytes();
    byte[] header = Bytes.head(key, 26);
    byte[] sellerId = Bytes.tail(header, 18);
    byte[] cuboidId = Bytes.head(header, 8);
    byte[] restKey = Bytes.tail(key, rowkey.getLength() - 26);

    RowKeyDecoder decoder = new RowKeyDecoder(cube.getFirstSegment());
    decoder.decode(key);
    assertEquals("[123456789, 2012-12-15, 11848, Health & Beauty, Fragrances, Women, Auction, 0, 15]",
            decoder.getValues().toString());

    assertTrue(Bytes.toString(sellerId).startsWith("123456789"));
    assertEquals(511, Bytes.toLong(cuboidId));
    assertEquals(22, restKey.length);

    verifyMeasures(cube.getDescriptor().getMeasures(), result.get(0).getSecond(), "132.33", "132.33", "132.33");
}

From source file:org.apache.kylin.job.hadoop.cube.BaseCuboidMapperTest.java

License:Apache License

@Test
public void testMapperWithNull() throws Exception {
    String cubeName = "test_kylin_cube_with_slr_1_new_segment";
    String segmentName = "20130331080000_20131212080000";
    mapDriver.getConfiguration().set(BatchConstants.CFG_CUBE_NAME, cubeName);
    mapDriver.getConfiguration().set(BatchConstants.CFG_CUBE_SEGMENT_NAME, segmentName);
    // mapDriver.getConfiguration().set(BatchConstants.CFG_METADATA_URL,
    // metadata);
    mapDriver.withInput(new Text("key"),
            new Text("2012-12-15118480Health & BeautyFragrances\\NAuction15123456789\\N"));
    List<Pair<Text, Text>> result = mapDriver.run();

    CubeManager cubeMgr = CubeManager.getInstance(getTestConfig());
    CubeInstance cube = cubeMgr.getCube(cubeName);

    assertEquals(1, result.size());/*from   w ww  . j  av a2 s . c  om*/
    Text rowkey = result.get(0).getFirst();
    byte[] key = rowkey.getBytes();
    byte[] header = Bytes.head(key, 26);
    byte[] sellerId = Bytes.tail(header, 18);
    byte[] cuboidId = Bytes.head(header, 8);
    byte[] restKey = Bytes.tail(key, rowkey.getLength() - 26);

    RowKeyDecoder decoder = new RowKeyDecoder(cube.getFirstSegment());
    decoder.decode(key);
    assertEquals("[123456789, 2012-12-15, 11848, Health & Beauty, Fragrances, null, Auction, 0, 15]",
            decoder.getValues().toString());

    assertTrue(Bytes.toString(sellerId).startsWith("123456789"));
    assertEquals(511, Bytes.toLong(cuboidId));
    assertEquals(22, restKey.length);

    verifyMeasures(cube.getDescriptor().getMeasures(), result.get(0).getSecond(), "0", "0", "0");
}

From source file:org.apache.kylin.job.hadoop.cube.CubeHFileMapper2Test.java

License:Apache License

@Test
public void testBasic() throws Exception {

    Configuration hconf = new Configuration();
    Context context = MockupMapContext.create(hconf, getTestConfig().getMetadataUrl(), cubeName, outKV);

    CubeHFileMapper mapper = new CubeHFileMapper();
    mapper.setup(context);/*from  www.j av  a2 s. co  m*/

    Text key = new Text("not important");
    Text value = new Text(new byte[] { 2, 2, 51, -79, 1 });

    mapper.map(key, value, context);

    ImmutableBytesWritable outKey = (ImmutableBytesWritable) outKV[0];
    KeyValue outValue = (KeyValue) outKV[1];

    assertTrue(Bytes.compareTo(key.getBytes(), 0, key.getLength(), outKey.get(), outKey.getOffset(),
            outKey.getLength()) == 0);

    assertTrue(Bytes.compareTo(value.getBytes(), 0, value.getLength(), outValue.getValueArray(),
            outValue.getValueOffset(), outValue.getValueLength()) == 0);
}

From source file:org.apache.kylin.job.hadoop.cube.MergeCuboidMapper.java

License:Apache License

@Override
public void map(Text key, Text value, Context context) throws IOException, InterruptedException {
    long cuboidID = rowKeySplitter.split(key.getBytes(), key.getBytes().length);
    Cuboid cuboid = Cuboid.findById(cubeDesc, cuboidID);

    SplittedBytes[] splittedByteses = rowKeySplitter.getSplitBuffers();
    int bufOffset = 0;
    BytesUtil.writeLong(cuboidID, newKeyBuf, bufOffset, RowConstants.ROWKEY_CUBOIDID_LEN);
    bufOffset += RowConstants.ROWKEY_CUBOIDID_LEN;

    for (int i = 0; i < cuboid.getColumns().size(); ++i) {
        TblColRef col = cuboid.getColumns().get(i);

        if (this.checkNeedMerging(col)) {
            // if dictionary on fact table column, needs rewrite
            DictionaryManager dictMgr = DictionaryManager.getInstance(config);
            Dictionary<?> sourceDict = dictMgr.getDictionary(sourceCubeSegment.getDictResPath(col));
            Dictionary<?> mergedDict = dictMgr.getDictionary(mergedCubeSegment.getDictResPath(col));

            while (sourceDict.getSizeOfValue() > newKeyBuf.length - bufOffset
                    || mergedDict.getSizeOfValue() > newKeyBuf.length - bufOffset) {
                byte[] oldBuf = newKeyBuf;
                newKeyBuf = new byte[2 * newKeyBuf.length];
                System.arraycopy(oldBuf, 0, newKeyBuf, 0, oldBuf.length);
            }//from   w ww  . j  a v a2s .  c  om

            int idInSourceDict = BytesUtil.readUnsigned(splittedByteses[i + 1].value, 0,
                    splittedByteses[i + 1].length);
            int idInMergedDict;

            int size = sourceDict.getValueBytesFromId(idInSourceDict, newKeyBuf, bufOffset);
            if (size < 0) {
                idInMergedDict = mergedDict.nullId();
            } else {
                idInMergedDict = mergedDict.getIdFromValueBytes(newKeyBuf, bufOffset, size);
            }

            BytesUtil.writeUnsigned(idInMergedDict, newKeyBuf, bufOffset, mergedDict.getSizeOfId());
            bufOffset += mergedDict.getSizeOfId();
        } else {
            // keep as it is
            while (splittedByteses[i + 1].length > newKeyBuf.length - bufOffset) {
                byte[] oldBuf = newKeyBuf;
                newKeyBuf = new byte[2 * newKeyBuf.length];
                System.arraycopy(oldBuf, 0, newKeyBuf, 0, oldBuf.length);
            }

            System.arraycopy(splittedByteses[i + 1].value, 0, newKeyBuf, bufOffset,
                    splittedByteses[i + 1].length);
            bufOffset += splittedByteses[i + 1].length;
        }
    }
    byte[] newKey = Arrays.copyOf(newKeyBuf, bufOffset);
    outputKey.set(newKey, 0, newKey.length);

    context.write(outputKey, value);
}

From source file:org.apache.kylin.storage.hbase.steps.CubeHFileMapper.java

License:Apache License

@Override
public void doMap(Text key, Text value, Context context) throws IOException, InterruptedException {
    outputKey.set(key.getBytes(), 0, key.getLength());
    KeyValue outputValue;/*from w ww  . j  a va2 s.  c o  m*/

    int n = keyValueCreators.size();
    if (n == 1 && keyValueCreators.get(0).isFullCopy) { // shortcut for simple full copy

        outputValue = keyValueCreators.get(0).create(key, value.getBytes(), 0, value.getLength());
        context.write(outputKey, outputValue);

    } else { // normal (complex) case that distributes measures to multiple HBase columns

        inputCodec.decode(ByteBuffer.wrap(value.getBytes(), 0, value.getLength()), inputMeasures);

        for (int i = 0; i < n; i++) {
            outputValue = keyValueCreators.get(i).create(key, inputMeasures);
            context.write(outputKey, outputValue);
        }
    }
}

From source file:org.apache.kylin.storage.hbase.steps.CubeHFileMapper2Test.java

License:Apache License

@Test
public void testBasic() throws Exception {

    Configuration hconf = HadoopUtil.getCurrentConfiguration();
    Context context = MockupMapContext.create(hconf, cubeName, outKV);

    CubeHFileMapper mapper = new CubeHFileMapper();
    mapper.setup(context);//from  w ww  . java 2s  .  com

    Text key = new Text("not important");
    Text value = new Text(new byte[] { 2, 2, 51, -79, 1 });

    mapper.map(key, value, context);

    ImmutableBytesWritable outKey = (ImmutableBytesWritable) outKV[0];
    KeyValue outValue = (KeyValue) outKV[1];

    assertTrue(Bytes.compareTo(key.getBytes(), 0, key.getLength(), outKey.get(), outKey.getOffset(),
            outKey.getLength()) == 0);

    assertTrue(Bytes.compareTo(value.getBytes(), 0, value.getLength(), outValue.getValueArray(),
            outValue.getValueOffset(), outValue.getValueLength()) == 0);
}

From source file:org.apache.kylin.storage.hbase.steps.KeyValueCreator.java

License:Apache License

public KeyValue create(Text key, Object[] measureValues) {
    return create(key.getBytes(), 0, key.getLength(), measureValues);
}