List of usage examples for org.apache.hadoop.io Text getBytes
@Override public byte[] getBytes()
From source file:org.apache.hyracks.imru.dataflow.Hdtest.java
License:Apache License
public static JobSpecification createJob() throws Exception { JobSpecification spec = new JobSpecification(); spec.setFrameSize(4096);/*from ww w. j a v a 2 s .co m*/ String PATH_TO_HADOOP_CONF = "/home/wangrui/a/imru/hadoop-0.20.2/conf"; String HDFS_INPUT_PATH = "/customer/customer.tbl,/customer_result/part-0"; JobConf conf = new JobConf(); conf.addResource(new Path(PATH_TO_HADOOP_CONF + "/core-site.xml")); conf.addResource(new Path(PATH_TO_HADOOP_CONF + "/mapred-site.xml")); conf.addResource(new Path(PATH_TO_HADOOP_CONF + "/hdfs-site.xml")); FileInputFormat.setInputPaths(conf, HDFS_INPUT_PATH); conf.setInputFormat(TextInputFormat.class); RecordDescriptor recordDesc = new RecordDescriptor( new ISerializerDeserializer[] { UTF8StringSerializerDeserializer.INSTANCE }); InputSplit[] splits = conf.getInputFormat().getSplits(conf, 1); HDFSReadOperatorDescriptor readOperator = new HDFSReadOperatorDescriptor(spec, recordDesc, conf, splits, new String[] { "NC0", "NC1" }, new IKeyValueParserFactory<LongWritable, Text>() { @Override public IKeyValueParser<LongWritable, Text> createKeyValueParser(final IHyracksTaskContext ctx) { return new IKeyValueParser<LongWritable, Text>() { TupleWriter tupleWriter; @Override public void open(IFrameWriter writer) throws HyracksDataException { tupleWriter = new TupleWriter(ctx, writer, 1); } @Override public void parse(LongWritable key, Text value, IFrameWriter writer, String fileString) throws HyracksDataException { try { tupleWriter.write(value.getBytes(), 0, value.getLength()); tupleWriter.finishField(); tupleWriter.finishTuple(); } catch (IOException e) { throw new HyracksDataException(e); } } @Override public void close(IFrameWriter writer) throws HyracksDataException { tupleWriter.close(); } }; } }); // createPartitionConstraint(spec, readOperator, new String[] {"NC0"}); PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, readOperator, new String[] { "NC0", "NC1" }); IOperatorDescriptor writer = new HDFSOD(spec, null, null, null); // createPartitionConstraint(spec, writer, outSplits); spec.connect(new OneToOneConnectorDescriptor(spec), readOperator, 0, writer, 0); spec.addRoot(writer); return spec; }
From source file:org.apache.ignite.hadoop.io.TextPartiallyRawComparator.java
License:Apache License
/** {@inheritDoc} */ @Override//from w w w . j a v a 2s. co m public int compare(Text val1, long val2Ptr, int val2Len) { int len2 = WritableUtils.decodeVIntSize(GridUnsafe.getByte(val2Ptr)); return HadoopUtils.compareBytes(val1.getBytes(), val1.getLength(), val2Ptr + len2, val2Len - len2); }
From source file:org.apache.kudu.mapreduce.tools.ImportCsvMapper.java
License:Apache License
/** * Convert a line of CSV text into a Kudu Insert *///www .j a v a 2 s . co m @Override public void map(LongWritable offset, Text value, Context context) throws IOException { byte[] lineBytes = value.getBytes(); try { CsvParser.ParsedLine parsed = this.parser.parse(lineBytes, value.getLength()); Insert insert = this.table.newInsert(); PartialRow row = insert.getRow(); for (int i = 0; i < parsed.getColumnCount(); i++) { String colName = parsed.getColumnName(i); ColumnSchema col = this.schema.getColumn(colName); String colValue = Bytes.getString(parsed.getLineBytes(), parsed.getColumnOffset(i), parsed.getColumnLength(i)); switch (col.getType()) { case BOOL: row.addBoolean(colName, Boolean.parseBoolean(colValue)); break; case INT8: row.addByte(colName, Byte.parseByte(colValue)); break; case INT16: row.addShort(colName, Short.parseShort(colValue)); break; case INT32: row.addInt(colName, Integer.parseInt(colValue)); break; case INT64: row.addLong(colName, Long.parseLong(colValue)); break; case STRING: row.addString(colName, colValue); break; case FLOAT: row.addFloat(colName, Float.parseFloat(colValue)); break; case DOUBLE: row.addDouble(colName, Double.parseDouble(colValue)); break; default: throw new IllegalArgumentException("Type " + col.getType() + " not recognized"); } } context.write(NULL_KEY, insert); } catch (CsvParser.BadCsvLineException badLine) { if (this.skipBadLines) { System.err.println("Bad line at offset: " + offset.get() + ":\n" + badLine.getMessage()); this.badLineCount.increment(1); return; } else { throw new IOException("Failing task because of a bad line", badLine); } } catch (IllegalArgumentException e) { if (this.skipBadLines) { System.err.println("Bad line at offset: " + offset.get() + ":\n" + e.getMessage()); this.badLineCount.increment(1); return; } else { throw new IOException("Failing task because of an illegal argument", e); } } catch (InterruptedException e) { throw new IOException("Failing task since it was interrupted", e); } }
From source file:org.apache.kylin.engine.mr.steps.CalculateStatsFromBaseCuboidMapper.java
License:Apache License
@Override public void doMap(Text key, Text value, Context context) throws InterruptedException, IOException { long cuboidID = rowKeyDecoder.decode(key.getBytes()); if (cuboidID != baseCuboidId) { return; // Skip data from cuboids which are not the base cuboid }/*from ww w.j a va 2s . c om*/ List<String> keyValues = rowKeyDecoder.getValues(); if (rowCount < samplingPercentage) { Preconditions.checkArgument(nRowKey == keyValues.size()); String[] row = keyValues.toArray(new String[keyValues.size()]); if (isUsePutRowKeyToHllNewAlgorithm) { putRowKeyToHLLNew(row); } else { putRowKeyToHLLOld(row); } } if (++rowCount == 100) rowCount = 0; }
From source file:org.apache.kylin.engine.mr.steps.CalculateStatsFromBaseCuboidPartitioner.java
License:Apache License
@Override public int getPartition(Text key, Text value, int numReduceTasks) { Long cuboidId = Bytes.toLong(key.getBytes()); int shard = cuboidId.hashCode() % hllShardBase; if (shard < 0) { shard += hllShardBase;//w ww . j a va 2s.c o m } return numReduceTasks - shard - 1; }
From source file:org.apache.kylin.engine.mr.steps.CalculateStatsFromBaseCuboidReducer.java
License:Apache License
@Override public void doReduce(Text key, Iterable<Text> values, Context context) throws IOException, InterruptedException { long cuboidId = Bytes.toLong(key.getBytes()); logger.info("Cuboid id to be processed: " + cuboidId); for (Text value : values) { HLLCounter hll = new HLLCounter(cubeConfig.getCubeStatsHLLPrecision()); ByteBuffer bf = ByteBuffer.wrap(value.getBytes(), 0, value.getLength()); hll.readRegisters(bf);// w w w .j a v a2 s . co m if (cuboidId == baseCuboidId) { baseCuboidRowCountInMappers.add(hll.getCountEstimate()); } totalRowsBeforeMerge += hll.getCountEstimate(); if (cuboidHLLMap.get(cuboidId) != null) { cuboidHLLMap.get(cuboidId).merge(hll); } else { cuboidHLLMap.put(cuboidId, hll); } } }
From source file:org.apache.kylin.engine.mr.steps.CuboidReducer.java
License:Apache License
@Override public void doReduce(Text key, Iterable<Text> values, Context context) throws IOException, InterruptedException { aggs.reset();//from ww w. java 2 s . com for (Text value : values) { if (vcounter++ % BatchConstants.NORMAL_RECORD_LOG_THRESHOLD == 0) { logger.info("Handling value with ordinal (This is not KV number!): " + vcounter); } codec.decode(ByteBuffer.wrap(value.getBytes(), 0, value.getLength()), input); aggs.aggregate(input, needAggrMeasures); } aggs.collectStates(result); ByteBuffer valueBuf = codec.encode(result); outputValue.set(valueBuf.array(), 0, valueBuf.position()); context.write(key, outputValue); }
From source file:org.apache.kylin.engine.mr.steps.FactDistinctColumnPartitioner.java
License:Apache License
@Override public int getPartition(SelfDefineSortableKey skey, Text value, int numReduceTasks) { Text key = skey.getText(); if (key.getBytes()[0] == FactDistinctColumnsMapper.MARK_FOR_HLL) { // the last reducer is for merging hll return numReduceTasks - 1; } else if (key.getBytes()[0] == FactDistinctColumnsMapper.MARK_FOR_PARTITION_COL) { // the last but one reducer is for partition col return numReduceTasks - 2; } else {//from ww w. j a v a 2 s . c o m return BytesUtil.readUnsigned(key.getBytes(), 0, 1); } }
From source file:org.apache.kylin.engine.mr.steps.FactDistinctColumnsReducer.java
License:Apache License
@Override public void doReduce(SelfDefineSortableKey skey, Iterable<Text> values, Context context) throws IOException, InterruptedException { Text key = skey.getText(); if (isStatistics) { // for hll long cuboidId = Bytes.toLong(key.getBytes(), 1, Bytes.SIZEOF_LONG); for (Text value : values) { HLLCounter hll = new HLLCounter(cubeConfig.getCubeStatsHLLPrecision()); ByteBuffer bf = ByteBuffer.wrap(value.getBytes(), 0, value.getLength()); hll.readRegisters(bf);/*from w ww .ja v a2 s . co m*/ totalRowsBeforeMerge += hll.getCountEstimate(); if (cuboidId == baseCuboidId) { baseCuboidRowCountInMappers.add(hll.getCountEstimate()); } if (cuboidHLLMap.get(cuboidId) != null) { cuboidHLLMap.get(cuboidId).merge(hll); } else { cuboidHLLMap.put(cuboidId, hll); } } } else if (isPartitionCol) { // partition col String value = Bytes.toString(key.getBytes(), 1, key.getLength() - 1); logAFewRows(value); long time = DateFormat.stringToMillis(value); timeMinValue = Math.min(timeMinValue, time); timeMaxValue = Math.max(timeMaxValue, time); } else { // normal col if (buildDictInReducer) { String value = Bytes.toString(key.getBytes(), 1, key.getLength() - 1); logAFewRows(value); builder.addValue(value); } else { byte[] keyBytes = Bytes.copy(key.getBytes(), 1, key.getLength() - 1); // output written to baseDir/colName/-r-00000 (etc) String fileName = col.getIdentity() + "/"; mos.write(BatchConstants.CFG_OUTPUT_COLUMN, NullWritable.get(), new Text(keyBytes), fileName); } } rowCount++; }
From source file:org.apache.kylin.engine.mr.steps.FilterRecommendCuboidDataMapper.java
License:Apache License
@Override public void doMap(Text key, Text value, Context context) throws IOException, InterruptedException { long cuboidID = rowKeySplitter.split(key.getBytes()); if (cuboidID != baseCuboid && !recommendCuboids.contains(cuboidID)) { return;/*from ww w .java2 s.c om*/ } String baseOutputPath = PathNameCuboidOld; if (cuboidID == baseCuboid) { baseOutputPath = PathNameCuboidBase; } mos.write(key, value, generateFileName(baseOutputPath)); }