List of usage examples for org.apache.hadoop.io Text getBytes
@Override public byte[] getBytes()
From source file:hivemall.fm.FFMPredictUDF.java
License:Apache License
@Override public Object evaluate(DeferredObject[] args) throws HiveException { String modelId = _modelIdOI.getPrimitiveJavaObject(args[0].get()); if (modelId == null) { throw new HiveException("modelId is not set"); }// ww w .jav a 2s . co m final FFMPredictionModel model; if (modelId.equals(_cachedModeId)) { model = this._cachedModel; } else { Text serModel = _modelOI.getPrimitiveWritableObject(args[1].get()); if (serModel == null) { throw new HiveException("Model is null for model ID: " + modelId); } byte[] b = serModel.getBytes(); final int length = serModel.getLength(); try { model = FFMPredictionModel.deserialize(b, length); b = null; } catch (ClassNotFoundException e) { throw new HiveException(e); } catch (IOException e) { throw new HiveException(e); } this._cachedModeId = modelId; this._cachedModel = model; } int numFeatures = model.getNumFeatures(); int numFields = model.getNumFields(); Object arg2 = args[2].get(); // [workaround] // java.lang.ClassCastException: org.apache.hadoop.hive.serde2.lazybinary.LazyBinaryArray // cannot be cast to [Ljava.lang.Object; if (arg2 instanceof LazyBinaryArray) { arg2 = ((LazyBinaryArray) arg2).getList(); } Feature[] x = Feature.parseFFMFeatures(arg2, _featureListOI, _probes, numFeatures, numFields); if (x == null || x.length == 0) { return null; // return NULL if there are no features } this._probes = x; double predicted = predict(x, model); _result.set(predicted); return _result; }
From source file:hivemall.mix.MixMessageEncoder.java
License:Open Source License
private static void encodeObject(final Object obj, final ByteBuf buf) throws IOException { assert (obj != null); if (obj instanceof Integer) { Integer i = (Integer) obj; buf.writeByte(INTEGER_TYPE);/*w ww . j a v a 2s. com*/ buf.writeInt(i.intValue()); } else if (obj instanceof Text) { Text t = (Text) obj; byte[] b = t.getBytes(); int length = t.getLength(); buf.writeByte(TEXT_TYPE); buf.writeInt(length); buf.writeBytes(b, 0, length); } else if (obj instanceof String) { String s = (String) obj; buf.writeByte(STRING_TYPE); writeString(s, buf); } else if (obj instanceof IntWritable) { IntWritable i = (IntWritable) obj; buf.writeByte(INT_WRITABLE_TYPE); buf.writeInt(i.get()); } else if (obj instanceof LongWritable) { LongWritable l = (LongWritable) obj; buf.writeByte(LONG_WRITABLE_TYPE); buf.writeLong(l.get()); } else { throw new IllegalStateException("Unexpected type: " + obj.getClass().getName()); } }
From source file:hivemall.sketch.bloom.BloomContainsUDF.java
License:Apache License
@Nullable public Boolean evaluate(@Nullable Text bloomStr, @Nullable Text keyStr) throws HiveException { if (bloomStr == null || key == null) { return null; }/*from w w w . ja va2 s.c o m*/ final Filter bloom; if (prevFilter != null && prevKey.equals(keyStr)) { bloom = prevFilter; } else { try { bloom = BloomFilterUtils.deserialize(bloomStr, new DynamicBloomFilter()); } catch (IOException e) { throw new HiveException(e); } this.prevKey = keyStr; this.prevFilter = bloom; key.set(keyStr.getBytes(), 1.0d); } return Boolean.valueOf(bloom.membershipTest(key)); }
From source file:hivemall.sketch.bloom.BloomContainsUDFTest.java
License:Apache License
@Nonnull private static DynamicBloomFilter createBloomFilter(long seed, int size) { DynamicBloomFilter dbf = BloomFilterUtils.newDynamicBloomFilter(30); final Key key = new Key(); final Random rnd1 = new Random(seed); for (int i = 0; i < size; i++) { double d = rnd1.nextGaussian(); String s = Double.toHexString(d); Text t = new Text(s); key.set(t.getBytes(), 1.0); dbf.add(key);/*from w w w .j a v a2 s .c o m*/ } return dbf; }
From source file:hivemall.sketch.bloom.BloomFilterUtils.java
License:Apache License
@Nonnull public static <F extends Filter> F deserialize(@Nonnull final Text in, @Nonnull final F dst) throws IOException { return deserialize(in.getBytes(), 0, in.getLength(), dst); }
From source file:hivemall.tools.compress.DeflateUDF.java
License:Apache License
@Override public BytesWritable evaluate(DeferredObject[] arguments) throws HiveException { if (codec == null) { this.codec = new DeflateCodec(true, false); }//from ww w. j ava2 s . c o m Object arg0 = arguments[0].get(); if (arg0 == null) { return null; } Text text = stringOI.getPrimitiveWritableObject(arg0); byte[] original = text.getBytes(); final int len = text.getLength(); final byte[] compressed; try { compressed = codec.compress(original, 0, len, compressionLevel); } catch (IOException e) { throw new HiveException("Failed to compress", e); } original = null; if (result == null) { this.result = new BytesWritable(compressed); } else { result.set(compressed, 0, compressed.length); } return result; }
From source file:hivemall.tools.text.Unbase91UDF.java
License:Apache License
@Override public BytesWritable evaluate(DeferredObject[] arguments) throws HiveException { if (outputBuf == null) { this.outputBuf = new FastByteArrayOutputStream(4096); } else {/*from ww w.j av a2s . co m*/ outputBuf.reset(); } Object arg0 = arguments[0].get(); if (arg0 == null) { return null; } Text input = stringOI.getPrimitiveWritableObject(arg0); final byte[] inputBytes = input.getBytes(); final int len = input.getLength(); try { Base91.decode(inputBytes, 0, len, outputBuf); } catch (IOException e) { throw new HiveException(e); } if (result == null) { byte[] outputBytes = outputBuf.toByteArray(); this.result = new BytesWritable(outputBytes); } else { byte[] outputBytes = outputBuf.getInternalArray(); int outputSize = outputBuf.size(); result.set(outputBytes, 0, outputSize); } return result; }
From source file:hivemall.utils.hadoop.JsonSerdeUtils.java
License:Apache License
@SuppressWarnings("unchecked") @Nonnull/* ww w . java 2 s . c om*/ public static <T> T deserialize(@Nonnull final Text t, @Nullable final List<String> columnNames, @Nullable final List<TypeInfo> columnTypes) throws SerDeException { final Object result; try { JsonParser p = new JsonFactory().createJsonParser(new FastByteArrayInputStream(t.getBytes())); final JsonToken token = p.nextToken(); if (token == JsonToken.START_OBJECT) { result = parseObject(p, columnNames, columnTypes); } else if (token == JsonToken.START_ARRAY) { result = parseArray(p, columnTypes); } else { result = parseValue(p); } } catch (JsonParseException e) { throw new SerDeException(e); } catch (IOException e) { throw new SerDeException(e); } return (T) result; }
From source file:io.aos.hdfs.TextIterator.java
License:Apache License
public static void main(String... args) { Text t = new Text("\u0041\u00DF\u6771\uD801\uDC00"); ByteBuffer buf = ByteBuffer.wrap(t.getBytes(), 0, t.getLength()); int cp;/*from w ww. j av a 2 s .c o m*/ while (buf.hasRemaining() && (cp = Text.bytesToCodePoint(buf)) != -1) { System.out.println(Integer.toHexString(cp)); } }
From source file:io.aos.hdfs.TextTest.java
License:Apache License
@Test public void test() throws IOException { // vv TextTest Text t = new Text("hadoop"); assertThat(t.getLength(), is(6));//from w w w. j a v a 2 s. c om assertThat(t.getBytes().length, is(6)); assertThat(t.charAt(2), is((int) 'd')); assertThat("Out of bounds", t.charAt(100), is(-1)); // ^^ TextTest }