Example usage for org.apache.hadoop.io Text getBytes

Introduction

In this page you can find the example usage for org.apache.hadoop.io Text getBytes.

Prototype

@Override
public byte[] getBytes()

Source Link

Document

Returns the raw bytes; however, only data up to #getLength() is valid.

Usage

From source file:org.apache.hyracks.imru.dataflow.Hdtest.java

License:Apache License

public static JobSpecification createJob() throws Exception {
    JobSpecification spec = new JobSpecification();
    spec.setFrameSize(4096);/*from ww w. j a  v a 2 s .co m*/

    String PATH_TO_HADOOP_CONF = "/home/wangrui/a/imru/hadoop-0.20.2/conf";
    String HDFS_INPUT_PATH = "/customer/customer.tbl,/customer_result/part-0";
    JobConf conf = new JobConf();
    conf.addResource(new Path(PATH_TO_HADOOP_CONF + "/core-site.xml"));
    conf.addResource(new Path(PATH_TO_HADOOP_CONF + "/mapred-site.xml"));
    conf.addResource(new Path(PATH_TO_HADOOP_CONF + "/hdfs-site.xml"));
    FileInputFormat.setInputPaths(conf, HDFS_INPUT_PATH);
    conf.setInputFormat(TextInputFormat.class);
    RecordDescriptor recordDesc = new RecordDescriptor(
            new ISerializerDeserializer[] { UTF8StringSerializerDeserializer.INSTANCE });
    InputSplit[] splits = conf.getInputFormat().getSplits(conf, 1);
    HDFSReadOperatorDescriptor readOperator = new HDFSReadOperatorDescriptor(spec, recordDesc, conf, splits,
            new String[] { "NC0", "NC1" }, new IKeyValueParserFactory<LongWritable, Text>() {
                @Override
                public IKeyValueParser<LongWritable, Text> createKeyValueParser(final IHyracksTaskContext ctx) {
                    return new IKeyValueParser<LongWritable, Text>() {
                        TupleWriter tupleWriter;

                        @Override
                        public void open(IFrameWriter writer) throws HyracksDataException {
                            tupleWriter = new TupleWriter(ctx, writer, 1);
                        }

                        @Override
                        public void parse(LongWritable key, Text value, IFrameWriter writer, String fileString)
                                throws HyracksDataException {
                            try {
                                tupleWriter.write(value.getBytes(), 0, value.getLength());
                                tupleWriter.finishField();
                                tupleWriter.finishTuple();
                            } catch (IOException e) {
                                throw new HyracksDataException(e);
                            }
                        }

                        @Override
                        public void close(IFrameWriter writer) throws HyracksDataException {
                            tupleWriter.close();
                        }
                    };
                }

            });

    // createPartitionConstraint(spec, readOperator, new String[] {"NC0"});
    PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, readOperator, new String[] { "NC0", "NC1" });

    IOperatorDescriptor writer = new HDFSOD(spec, null, null, null);
    // createPartitionConstraint(spec, writer, outSplits);

    spec.connect(new OneToOneConnectorDescriptor(spec), readOperator, 0, writer, 0);

    spec.addRoot(writer);
    return spec;
}

From source file:org.apache.ignite.hadoop.io.TextPartiallyRawComparator.java

License:Apache License

/** {@inheritDoc} */
@Override//from  w  w w . j a  v  a 2s. co  m
public int compare(Text val1, long val2Ptr, int val2Len) {
    int len2 = WritableUtils.decodeVIntSize(GridUnsafe.getByte(val2Ptr));

    return HadoopUtils.compareBytes(val1.getBytes(), val1.getLength(), val2Ptr + len2, val2Len - len2);
}

From source file:org.apache.kudu.mapreduce.tools.ImportCsvMapper.java

License:Apache License

/**
 * Convert a line of CSV text into a Kudu Insert
 *///www .j a  v a 2 s  .  co m
@Override
public void map(LongWritable offset, Text value, Context context) throws IOException {
    byte[] lineBytes = value.getBytes();

    try {
        CsvParser.ParsedLine parsed = this.parser.parse(lineBytes, value.getLength());

        Insert insert = this.table.newInsert();
        PartialRow row = insert.getRow();
        for (int i = 0; i < parsed.getColumnCount(); i++) {
            String colName = parsed.getColumnName(i);
            ColumnSchema col = this.schema.getColumn(colName);
            String colValue = Bytes.getString(parsed.getLineBytes(), parsed.getColumnOffset(i),
                    parsed.getColumnLength(i));
            switch (col.getType()) {
            case BOOL:
                row.addBoolean(colName, Boolean.parseBoolean(colValue));
                break;
            case INT8:
                row.addByte(colName, Byte.parseByte(colValue));
                break;
            case INT16:
                row.addShort(colName, Short.parseShort(colValue));
                break;
            case INT32:
                row.addInt(colName, Integer.parseInt(colValue));
                break;
            case INT64:
                row.addLong(colName, Long.parseLong(colValue));
                break;
            case STRING:
                row.addString(colName, colValue);
                break;
            case FLOAT:
                row.addFloat(colName, Float.parseFloat(colValue));
                break;
            case DOUBLE:
                row.addDouble(colName, Double.parseDouble(colValue));
                break;
            default:
                throw new IllegalArgumentException("Type " + col.getType() + " not recognized");
            }
        }
        context.write(NULL_KEY, insert);
    } catch (CsvParser.BadCsvLineException badLine) {
        if (this.skipBadLines) {
            System.err.println("Bad line at offset: " + offset.get() + ":\n" + badLine.getMessage());
            this.badLineCount.increment(1);
            return;
        } else {
            throw new IOException("Failing task because of a bad line", badLine);
        }
    } catch (IllegalArgumentException e) {
        if (this.skipBadLines) {
            System.err.println("Bad line at offset: " + offset.get() + ":\n" + e.getMessage());
            this.badLineCount.increment(1);
            return;
        } else {
            throw new IOException("Failing task because of an illegal argument", e);
        }
    } catch (InterruptedException e) {
        throw new IOException("Failing task since it was interrupted", e);
    }
}

From source file:org.apache.kylin.engine.mr.steps.CalculateStatsFromBaseCuboidMapper.java

License:Apache License

@Override
public void doMap(Text key, Text value, Context context) throws InterruptedException, IOException {
    long cuboidID = rowKeyDecoder.decode(key.getBytes());
    if (cuboidID != baseCuboidId) {
        return; // Skip data from cuboids which are not the base cuboid
    }/*from  ww w.j a va 2s  . c  om*/

    List<String> keyValues = rowKeyDecoder.getValues();

    if (rowCount < samplingPercentage) {
        Preconditions.checkArgument(nRowKey == keyValues.size());

        String[] row = keyValues.toArray(new String[keyValues.size()]);
        if (isUsePutRowKeyToHllNewAlgorithm) {
            putRowKeyToHLLNew(row);
        } else {
            putRowKeyToHLLOld(row);
        }
    }

    if (++rowCount == 100)
        rowCount = 0;
}

From source file:org.apache.kylin.engine.mr.steps.CalculateStatsFromBaseCuboidPartitioner.java

License:Apache License

@Override
public int getPartition(Text key, Text value, int numReduceTasks) {
    Long cuboidId = Bytes.toLong(key.getBytes());
    int shard = cuboidId.hashCode() % hllShardBase;
    if (shard < 0) {
        shard += hllShardBase;//w ww  .  j a va  2s.c  o  m
    }
    return numReduceTasks - shard - 1;
}

From source file:org.apache.kylin.engine.mr.steps.CalculateStatsFromBaseCuboidReducer.java

License:Apache License

@Override
public void doReduce(Text key, Iterable<Text> values, Context context)
        throws IOException, InterruptedException {
    long cuboidId = Bytes.toLong(key.getBytes());
    logger.info("Cuboid id to be processed: " + cuboidId);
    for (Text value : values) {
        HLLCounter hll = new HLLCounter(cubeConfig.getCubeStatsHLLPrecision());
        ByteBuffer bf = ByteBuffer.wrap(value.getBytes(), 0, value.getLength());
        hll.readRegisters(bf);// w w w .j  a  v  a2  s .  co m

        if (cuboidId == baseCuboidId) {
            baseCuboidRowCountInMappers.add(hll.getCountEstimate());
        }

        totalRowsBeforeMerge += hll.getCountEstimate();

        if (cuboidHLLMap.get(cuboidId) != null) {
            cuboidHLLMap.get(cuboidId).merge(hll);
        } else {
            cuboidHLLMap.put(cuboidId, hll);
        }
    }
}

From source file:org.apache.kylin.engine.mr.steps.CuboidReducer.java

License:Apache License

@Override
public void doReduce(Text key, Iterable<Text> values, Context context)
        throws IOException, InterruptedException {
    aggs.reset();//from   ww  w. java  2  s  .  com

    for (Text value : values) {
        if (vcounter++ % BatchConstants.NORMAL_RECORD_LOG_THRESHOLD == 0) {
            logger.info("Handling value with ordinal (This is not KV number!): " + vcounter);
        }
        codec.decode(ByteBuffer.wrap(value.getBytes(), 0, value.getLength()), input);
        aggs.aggregate(input, needAggrMeasures);
    }
    aggs.collectStates(result);

    ByteBuffer valueBuf = codec.encode(result);

    outputValue.set(valueBuf.array(), 0, valueBuf.position());
    context.write(key, outputValue);
}

From source file:org.apache.kylin.engine.mr.steps.FactDistinctColumnPartitioner.java

License:Apache License

@Override
public int getPartition(SelfDefineSortableKey skey, Text value, int numReduceTasks) {
    Text key = skey.getText();
    if (key.getBytes()[0] == FactDistinctColumnsMapper.MARK_FOR_HLL) {
        // the last reducer is for merging hll
        return numReduceTasks - 1;
    } else if (key.getBytes()[0] == FactDistinctColumnsMapper.MARK_FOR_PARTITION_COL) {
        // the last but one reducer is for partition col
        return numReduceTasks - 2;
    } else {//from ww  w. j  a v a 2  s  . c o m
        return BytesUtil.readUnsigned(key.getBytes(), 0, 1);
    }
}

From source file:org.apache.kylin.engine.mr.steps.FactDistinctColumnsReducer.java

License:Apache License

@Override
public void doReduce(SelfDefineSortableKey skey, Iterable<Text> values, Context context)
        throws IOException, InterruptedException {
    Text key = skey.getText();
    if (isStatistics) {
        // for hll
        long cuboidId = Bytes.toLong(key.getBytes(), 1, Bytes.SIZEOF_LONG);
        for (Text value : values) {
            HLLCounter hll = new HLLCounter(cubeConfig.getCubeStatsHLLPrecision());
            ByteBuffer bf = ByteBuffer.wrap(value.getBytes(), 0, value.getLength());
            hll.readRegisters(bf);/*from w ww  .ja  v a2 s .  co m*/

            totalRowsBeforeMerge += hll.getCountEstimate();

            if (cuboidId == baseCuboidId) {
                baseCuboidRowCountInMappers.add(hll.getCountEstimate());
            }

            if (cuboidHLLMap.get(cuboidId) != null) {
                cuboidHLLMap.get(cuboidId).merge(hll);
            } else {
                cuboidHLLMap.put(cuboidId, hll);
            }
        }
    } else if (isPartitionCol) {
        // partition col
        String value = Bytes.toString(key.getBytes(), 1, key.getLength() - 1);
        logAFewRows(value);
        long time = DateFormat.stringToMillis(value);
        timeMinValue = Math.min(timeMinValue, time);
        timeMaxValue = Math.max(timeMaxValue, time);
    } else {
        // normal col
        if (buildDictInReducer) {
            String value = Bytes.toString(key.getBytes(), 1, key.getLength() - 1);
            logAFewRows(value);
            builder.addValue(value);
        } else {
            byte[] keyBytes = Bytes.copy(key.getBytes(), 1, key.getLength() - 1);
            // output written to baseDir/colName/-r-00000 (etc)
            String fileName = col.getIdentity() + "/";
            mos.write(BatchConstants.CFG_OUTPUT_COLUMN, NullWritable.get(), new Text(keyBytes), fileName);
        }
    }

    rowCount++;
}

From source file:org.apache.kylin.engine.mr.steps.FilterRecommendCuboidDataMapper.java

License:Apache License

@Override
public void doMap(Text key, Text value, Context context) throws IOException, InterruptedException {
    long cuboidID = rowKeySplitter.split(key.getBytes());
    if (cuboidID != baseCuboid && !recommendCuboids.contains(cuboidID)) {
        return;/*from ww  w  .java2  s.c  om*/
    }

    String baseOutputPath = PathNameCuboidOld;
    if (cuboidID == baseCuboid) {
        baseOutputPath = PathNameCuboidBase;
    }
    mos.write(key, value, generateFileName(baseOutputPath));
}