List of usage examples for org.apache.hadoop.io Text getBytes
@Override public byte[] getBytes()
From source file:com.ebay.nest.io.sede.binarysortable.BinarySortableSerDe.java
License:Apache License
static void serialize(OutputByteBuffer buffer, Object o, ObjectInspector oi, boolean invert) throws SerDeException { // Is this field a null? if (o == null) { buffer.write((byte) 0, invert); return;/*w w w .j a v a 2 s . c om*/ } // This field is not a null. buffer.write((byte) 1, invert); switch (oi.getCategory()) { case PRIMITIVE: { PrimitiveObjectInspector poi = (PrimitiveObjectInspector) oi; switch (poi.getPrimitiveCategory()) { case VOID: { return; } case BOOLEAN: { boolean v = ((BooleanObjectInspector) poi).get(o); buffer.write((byte) (v ? 2 : 1), invert); return; } case BYTE: { ByteObjectInspector boi = (ByteObjectInspector) poi; byte v = boi.get(o); buffer.write((byte) (v ^ 0x80), invert); return; } case SHORT: { ShortObjectInspector spoi = (ShortObjectInspector) poi; short v = spoi.get(o); buffer.write((byte) ((v >> 8) ^ 0x80), invert); buffer.write((byte) v, invert); return; } case INT: { IntObjectInspector ioi = (IntObjectInspector) poi; int v = ioi.get(o); serializeInt(buffer, v, invert); return; } case LONG: { LongObjectInspector loi = (LongObjectInspector) poi; long v = loi.get(o); buffer.write((byte) ((v >> 56) ^ 0x80), invert); buffer.write((byte) (v >> 48), invert); buffer.write((byte) (v >> 40), invert); buffer.write((byte) (v >> 32), invert); buffer.write((byte) (v >> 24), invert); buffer.write((byte) (v >> 16), invert); buffer.write((byte) (v >> 8), invert); buffer.write((byte) v, invert); return; } case FLOAT: { FloatObjectInspector foi = (FloatObjectInspector) poi; int v = Float.floatToIntBits(foi.get(o)); if ((v & (1 << 31)) != 0) { // negative number, flip all bits v = ~v; } else { // positive number, flip the first bit v = v ^ (1 << 31); } buffer.write((byte) (v >> 24), invert); buffer.write((byte) (v >> 16), invert); buffer.write((byte) (v >> 8), invert); buffer.write((byte) v, invert); return; } case DOUBLE: { DoubleObjectInspector doi = (DoubleObjectInspector) poi; long v = Double.doubleToLongBits(doi.get(o)); if ((v & (1L << 63)) != 0) { // negative number, flip all bits v = ~v; } else { // positive number, flip the first bit v = v ^ (1L << 63); } buffer.write((byte) (v >> 56), invert); buffer.write((byte) (v >> 48), invert); buffer.write((byte) (v >> 40), invert); buffer.write((byte) (v >> 32), invert); buffer.write((byte) (v >> 24), invert); buffer.write((byte) (v >> 16), invert); buffer.write((byte) (v >> 8), invert); buffer.write((byte) v, invert); return; } case STRING: { StringObjectInspector soi = (StringObjectInspector) poi; Text t = soi.getPrimitiveWritableObject(o); serializeBytes(buffer, t.getBytes(), t.getLength(), invert); return; } case VARCHAR: { HiveVarcharObjectInspector hcoi = (HiveVarcharObjectInspector) poi; HiveVarcharWritable hc = hcoi.getPrimitiveWritableObject(o); // use varchar's text field directly Text t = hc.getTextValue(); serializeBytes(buffer, t.getBytes(), t.getLength(), invert); return; } case BINARY: { BinaryObjectInspector baoi = (BinaryObjectInspector) poi; BytesWritable ba = baoi.getPrimitiveWritableObject(o); byte[] toSer = new byte[ba.getLength()]; System.arraycopy(ba.getBytes(), 0, toSer, 0, ba.getLength()); serializeBytes(buffer, toSer, ba.getLength(), invert); return; } case DATE: { DateObjectInspector doi = (DateObjectInspector) poi; int v = doi.getPrimitiveWritableObject(o).getDays(); serializeInt(buffer, v, invert); return; } case TIMESTAMP: { TimestampObjectInspector toi = (TimestampObjectInspector) poi; TimestampWritable t = toi.getPrimitiveWritableObject(o); byte[] data = t.getBinarySortable(); for (int i = 0; i < data.length; i++) { buffer.write(data[i], invert); } return; } case DECIMAL: { // decimals are encoded in three pieces: // sign: 1, 2 or 3 for smaller, equal or larger than 0 respectively // factor: Number that indicates the amount of digits you have to move // the decimal point left or right until the resulting number is smaller // than zero but has something other than 0 as the first digit. // digits: which is a string of all the digits in the decimal. If the number // is negative the binary string will be inverted to get the correct ordering. // Example: 0.00123 // Sign is 3 (bigger than 0) // Factor is -2 (move decimal point 2 positions right) // Digits are: 123 HiveDecimalObjectInspector boi = (HiveDecimalObjectInspector) poi; HiveDecimal dec = boi.getPrimitiveJavaObject(o); // get the sign of the big decimal int sign = dec.compareTo(HiveDecimal.ZERO); // we'll encode the absolute value (sign is separate) dec = dec.abs(); // get the scale factor to turn big decimal into a decimal < 1 int factor = dec.precision() - dec.scale(); factor = sign == 1 ? factor : -factor; // convert the absolute big decimal to string dec.scaleByPowerOfTen(Math.abs(dec.scale())); String digits = dec.unscaledValue().toString(); // finally write out the pieces (sign, scale, digits) buffer.write((byte) (sign + 1), invert); buffer.write((byte) ((factor >> 24) ^ 0x80), invert); buffer.write((byte) (factor >> 16), invert); buffer.write((byte) (factor >> 8), invert); buffer.write((byte) factor, invert); serializeBytes(buffer, digits.getBytes(decimalCharSet), digits.length(), sign == -1 ? !invert : invert); return; } default: { throw new RuntimeException("Unrecognized type: " + poi.getPrimitiveCategory()); } } } case LIST: { ListObjectInspector loi = (ListObjectInspector) oi; ObjectInspector eoi = loi.getListElementObjectInspector(); // \1 followed by each element int size = loi.getListLength(o); for (int eid = 0; eid < size; eid++) { buffer.write((byte) 1, invert); serialize(buffer, loi.getListElement(o, eid), eoi, invert); } // and \0 to terminate buffer.write((byte) 0, invert); return; } case MAP: { MapObjectInspector moi = (MapObjectInspector) oi; ObjectInspector koi = moi.getMapKeyObjectInspector(); ObjectInspector voi = moi.getMapValueObjectInspector(); // \1 followed by each key and then each value Map<?, ?> map = moi.getMap(o); for (Map.Entry<?, ?> entry : map.entrySet()) { buffer.write((byte) 1, invert); serialize(buffer, entry.getKey(), koi, invert); serialize(buffer, entry.getValue(), voi, invert); } // and \0 to terminate buffer.write((byte) 0, invert); return; } case STRUCT: { StructObjectInspector soi = (StructObjectInspector) oi; List<? extends StructField> fields = soi.getAllStructFieldRefs(); for (int i = 0; i < fields.size(); i++) { serialize(buffer, soi.getStructFieldData(o, fields.get(i)), fields.get(i).getFieldObjectInspector(), invert); } return; } case UNION: { UnionObjectInspector uoi = (UnionObjectInspector) oi; byte tag = uoi.getTag(o); buffer.write(tag, invert); serialize(buffer, uoi.getField(o), uoi.getObjectInspectors().get(tag), invert); return; } default: { throw new RuntimeException("Unrecognized type: " + oi.getCategory()); } } }
From source file:com.ebay.nest.io.sede.lazy.LazyArray.java
License:Apache License
/** * Get the element without checking out-of-bound index. *//* ww w.j a v a 2 s . co m*/ private Object uncheckedGetElement(int index) { if (elementInited[index]) { return arrayElements[index] == null ? null : arrayElements[index].getObject(); } elementInited[index] = true; Text nullSequence = oi.getNullSequence(); int elementLength = startPosition[index + 1] - startPosition[index] - 1; if (elementLength == nullSequence.getLength() && 0 == LazyUtils.compare(bytes.getData(), startPosition[index], elementLength, nullSequence.getBytes(), 0, nullSequence.getLength())) { return arrayElements[index] = null; } arrayElements[index] = LazyFactory.createLazyObject(oi.getListElementObjectInspector()); arrayElements[index].init(bytes, startPosition[index], elementLength); return arrayElements[index].getObject(); }
From source file:com.ebay.nest.io.sede.lazy.LazyMap.java
License:Apache License
/** * Get the value object with the index without checking parsed. * * @param index//from w w w .ja va2 s .c o m * The index into the array starting from 0 */ private LazyObject uncheckedGetValue(int index) { if (valueInited[index]) { return valueObjects[index]; } valueInited[index] = true; Text nullSequence = oi.getNullSequence(); int valueIBegin = keyEnd[index] + 1; int valueILength = keyStart[index + 1] - 1 - valueIBegin; if (valueILength < 0 || ((valueILength == nullSequence.getLength()) && 0 == LazyUtils.compare(bytes.getData(), valueIBegin, valueILength, nullSequence.getBytes(), 0, nullSequence.getLength()))) { return valueObjects[index] = null; } valueObjects[index] = LazyFactory.createLazyObject(oi.getMapValueObjectInspector()); valueObjects[index].init(bytes, valueIBegin, valueILength); return valueObjects[index]; }
From source file:com.ebay.nest.io.sede.lazy.LazyMap.java
License:Apache License
/** * Get the key object with the index without checking parsed. * * @param index/* w w w . j a v a2 s .c om*/ * The index into the array starting from 0 */ private LazyPrimitive<?, ?> uncheckedGetKey(int index) { if (keyInited[index]) { return keyObjects[index]; } keyInited[index] = true; Text nullSequence = oi.getNullSequence(); int keyIBegin = keyStart[index]; int keyILength = keyEnd[index] - keyStart[index]; if (keyILength < 0 || ((keyILength == nullSequence.getLength()) && 0 == LazyUtils.compare(bytes.getData(), keyIBegin, keyILength, nullSequence.getBytes(), 0, nullSequence.getLength()))) { return keyObjects[index] = null; } // Keys are always primitive keyObjects[index] = LazyFactory .createLazyPrimitiveClass((PrimitiveObjectInspector) oi.getMapKeyObjectInspector()); keyObjects[index].init(bytes, keyIBegin, keyILength); return keyObjects[index]; }
From source file:com.ebay.nest.io.sede.lazy.LazySimpleSerDe.java
License:Apache License
/** * Deserialize a row from the Writable to a LazyObject. * * @param field//from ww w . j a v a 2 s . c o m * the Writable that contains the data * @return The deserialized row Object. * @see SerDe#deserialize(Writable) */ @Override public Object deserialize(Writable field) throws SerDeException { if (byteArrayRef == null) { byteArrayRef = new ByteArrayRef(); } if (field instanceof BytesWritable) { BytesWritable b = (BytesWritable) field; // For backward-compatibility with hadoop 0.17 byteArrayRef.setData(b.getBytes()); cachedLazyStruct.init(byteArrayRef, 0, b.getLength()); } else if (field instanceof Text) { Text t = (Text) field; byteArrayRef.setData(t.getBytes()); cachedLazyStruct.init(byteArrayRef, 0, t.getLength()); } else { throw new SerDeException(getClass().toString() + ": expects either BytesWritable or Text object!"); } lastOperationSerialize = false; lastOperationDeserialize = true; return cachedLazyStruct; }
From source file:com.ebay.nest.io.sede.lazy.LazySimpleSerDe.java
License:Apache License
/** * Serialize the row into the StringBuilder. * * @param out// w ww . j a v a 2 s . c o m * The StringBuilder to store the serialized data. * @param obj * The object for the current field. * @param objInspector * The ObjectInspector for the current Object. * @param separators * The separators array. * @param level * The current level of separator. * @param nullSequence * The byte sequence representing the NULL value. * @param escaped * Whether we need to escape the data when writing out * @param escapeChar * Which char to use as the escape char, e.g. '\\' * @param needsEscape * Which chars needs to be escaped. This array should have size of * 128. Negative byte values (or byte values >= 128) are never * escaped. * @throws IOException * @throws SerDeException */ public static void serialize(ByteStream.Output out, Object obj, ObjectInspector objInspector, byte[] separators, int level, Text nullSequence, boolean escaped, byte escapeChar, boolean[] needsEscape) throws IOException, SerDeException { if (obj == null) { out.write(nullSequence.getBytes(), 0, nullSequence.getLength()); return; } char separator; List<?> list; switch (objInspector.getCategory()) { case PRIMITIVE: LazyUtils.writePrimitiveUTF8(out, obj, (PrimitiveObjectInspector) objInspector, escaped, escapeChar, needsEscape); return; case LIST: separator = (char) LazyUtils.getSeparator(separators, level); ListObjectInspector loi = (ListObjectInspector) objInspector; list = loi.getList(obj); ObjectInspector eoi = loi.getListElementObjectInspector(); if (list == null) { out.write(nullSequence.getBytes(), 0, nullSequence.getLength()); } else { for (int i = 0; i < list.size(); i++) { if (i > 0) { out.write(separator); } serialize(out, list.get(i), eoi, separators, level + 1, nullSequence, escaped, escapeChar, needsEscape); } } return; case MAP: separator = (char) LazyUtils.getSeparator(separators, level); char keyValueSeparator = (char) LazyUtils.getSeparator(separators, level + 1); MapObjectInspector moi = (MapObjectInspector) objInspector; ObjectInspector koi = moi.getMapKeyObjectInspector(); ObjectInspector voi = moi.getMapValueObjectInspector(); Map<?, ?> map = moi.getMap(obj); if (map == null) { out.write(nullSequence.getBytes(), 0, nullSequence.getLength()); } else { boolean first = true; for (Map.Entry<?, ?> entry : map.entrySet()) { if (first) { first = false; } else { out.write(separator); } serialize(out, entry.getKey(), koi, separators, level + 2, nullSequence, escaped, escapeChar, needsEscape); out.write(keyValueSeparator); serialize(out, entry.getValue(), voi, separators, level + 2, nullSequence, escaped, escapeChar, needsEscape); } } return; case STRUCT: separator = (char) LazyUtils.getSeparator(separators, level); StructObjectInspector soi = (StructObjectInspector) objInspector; List<? extends StructField> fields = soi.getAllStructFieldRefs(); list = soi.getStructFieldsDataAsList(obj); if (list == null) { out.write(nullSequence.getBytes(), 0, nullSequence.getLength()); } else { for (int i = 0; i < list.size(); i++) { if (i > 0) { out.write(separator); } serialize(out, list.get(i), fields.get(i).getFieldObjectInspector(), separators, level + 1, nullSequence, escaped, escapeChar, needsEscape); } } return; case UNION: separator = (char) LazyUtils.getSeparator(separators, level); UnionObjectInspector uoi = (UnionObjectInspector) objInspector; List<? extends ObjectInspector> ois = uoi.getObjectInspectors(); if (ois == null) { out.write(nullSequence.getBytes(), 0, nullSequence.getLength()); } else { LazyUtils.writePrimitiveUTF8(out, new Byte(uoi.getTag(obj)), PrimitiveObjectInspectorFactory.javaByteObjectInspector, escaped, escapeChar, needsEscape); out.write(separator); serialize(out, uoi.getField(obj), ois.get(uoi.getTag(obj)), separators, level + 1, nullSequence, escaped, escapeChar, needsEscape); } return; default: break; } throw new RuntimeException("Unknown category type: " + objInspector.getCategory()); }
From source file:com.ebay.nest.io.sede.lazy.LazyStruct.java
License:Apache License
/** * Get the field out of the row without checking parsed. This is called by * both getField and getFieldsAsList./*w w w. j av a2 s . c om*/ * * @param fieldID * The id of the field starting from 0. * @param nullSequence * The sequence representing NULL value. * @return The value of the field */ private Object uncheckedGetField(int fieldID) { Text nullSequence = oi.getNullSequence(); // Test the length first so in most cases we avoid doing a byte[] // comparison. int fieldByteBegin = startPosition[fieldID]; int fieldLength = startPosition[fieldID + 1] - startPosition[fieldID] - 1; if ((fieldLength < 0) || (fieldLength == nullSequence.getLength() && LazyUtils.compare(bytes.getData(), fieldByteBegin, fieldLength, nullSequence.getBytes(), 0, nullSequence.getLength()) == 0)) { return null; } if (!fieldInited[fieldID]) { fieldInited[fieldID] = true; fields[fieldID].init(bytes, fieldByteBegin, fieldLength); } return fields[fieldID].getObject(); }
From source file:com.ebay.nest.io.sede.lazy.LazyUnion.java
License:Apache License
/** * Get the field out of the row without checking parsed. * * @return The value of the field//from w ww . j a va 2 s. com */ private Object uncheckedGetField() { Text nullSequence = oi.getNullSequence(); int fieldLength = start + length - startPosition; if (fieldLength != 0 && fieldLength == nullSequence.getLength() && LazyUtils.compare(bytes.getData(), startPosition, fieldLength, nullSequence.getBytes(), 0, nullSequence.getLength()) == 0) { return null; } if (!fieldInited) { fieldInited = true; field.init(bytes, startPosition, fieldLength); } return field.getObject(); }
From source file:com.ebay.nest.io.sede.lazy.LazyUtils.java
License:Apache License
/** * Write out the text representation of a Primitive Object to a UTF8 byte * stream./*from w w w .j a v a 2 s . c o m*/ * * @param out * The UTF8 byte OutputStream * @param o * The primitive Object * @param needsEscape * Whether a character needs escaping. This array should have size of * 128. */ public static void writePrimitiveUTF8(OutputStream out, Object o, PrimitiveObjectInspector oi, boolean escaped, byte escapeChar, boolean[] needsEscape) throws IOException { switch (oi.getPrimitiveCategory()) { case BOOLEAN: { boolean b = ((BooleanObjectInspector) oi).get(o); if (b) { out.write(trueBytes, 0, trueBytes.length); } else { out.write(falseBytes, 0, falseBytes.length); } break; } case BYTE: { LazyInteger.writeUTF8(out, ((ByteObjectInspector) oi).get(o)); break; } case SHORT: { LazyInteger.writeUTF8(out, ((ShortObjectInspector) oi).get(o)); break; } case INT: { LazyInteger.writeUTF8(out, ((IntObjectInspector) oi).get(o)); break; } case LONG: { LazyLong.writeUTF8(out, ((LongObjectInspector) oi).get(o)); break; } case FLOAT: { float f = ((FloatObjectInspector) oi).get(o); ByteBuffer b = Text.encode(String.valueOf(f)); out.write(b.array(), 0, b.limit()); break; } case DOUBLE: { double d = ((DoubleObjectInspector) oi).get(o); ByteBuffer b = Text.encode(String.valueOf(d)); out.write(b.array(), 0, b.limit()); break; } case STRING: { Text t = ((StringObjectInspector) oi).getPrimitiveWritableObject(o); writeEscaped(out, t.getBytes(), 0, t.getLength(), escaped, escapeChar, needsEscape); break; } case VARCHAR: { HiveVarcharWritable hc = ((HiveVarcharObjectInspector) oi).getPrimitiveWritableObject(o); Text t = hc.getTextValue(); writeEscaped(out, t.getBytes(), 0, t.getLength(), escaped, escapeChar, needsEscape); break; } case BINARY: { BytesWritable bw = ((BinaryObjectInspector) oi).getPrimitiveWritableObject(o); byte[] toEncode = new byte[bw.getLength()]; System.arraycopy(bw.getBytes(), 0, toEncode, 0, bw.getLength()); byte[] toWrite = Base64.encodeBase64(toEncode); out.write(toWrite, 0, toWrite.length); break; } case DATE: { LazyDate.writeUTF8(out, ((DateObjectInspector) oi).getPrimitiveWritableObject(o)); break; } case TIMESTAMP: { LazyTimestamp.writeUTF8(out, ((TimestampObjectInspector) oi).getPrimitiveWritableObject(o)); break; } case DECIMAL: { HiveDecimal bd = ((HiveDecimalObjectInspector) oi).getPrimitiveJavaObject(o); ByteBuffer b = Text.encode(bd.toString()); out.write(b.array(), 0, b.limit()); break; } default: { throw new RuntimeException("Hive internal error."); } } }
From source file:com.facebook.presto.accumulo.examples.TpcHClerkSearch.java
License:Apache License
@Override public int run(AccumuloConfig config, CommandLine cmd) throws Exception { String[] searchTerms = cmd.getOptionValues(CLERK_ID); ZooKeeperInstance inst = new ZooKeeperInstance(config.getInstance(), config.getZooKeepers()); Connector conn = inst.getConnector(config.getUsername(), new PasswordToken(config.getPassword())); // Ensure both tables exists validateExists(conn, DATA_TABLE);//from w w w.j a v a 2 s . c o m validateExists(conn, INDEX_TABLE); long start = System.currentTimeMillis(); // Create a scanner against the index table BatchScanner idxScanner = conn.createBatchScanner(INDEX_TABLE, new Authorizations(), 10); LinkedList<Range> searchRanges = new LinkedList<Range>(); // Create a search Range from the command line args for (String searchTerm : searchTerms) { if (clerkRegex.matcher(searchTerm).matches()) { searchRanges.add(new Range(searchTerm)); } else { throw new InvalidParameterException( format("Search term %s does not match regex Clerk#[0-9]{9}", searchTerm)); } } // Set the search ranges for our scanner idxScanner.setRanges(searchRanges); // A list to hold all of the order IDs LinkedList<Range> orderIds = new LinkedList<Range>(); String orderId; // Process all of the records returned by the batch scanner for (Map.Entry<Key, Value> record : idxScanner) { // Get the order ID and add it to the list of order IDs orderIds.add(new Range(record.getKey().getColumnQualifier())); } // Close the batch scanner idxScanner.close(); // If clerkIDs is empty, log a message and return 0 if (orderIds.isEmpty()) { System.out.println("Found no orders with the given Clerk ID(s)"); return 0; } else { System.out.println(format("Searching data table for %d orders", orderIds.size())); } // Initialize the batch scanner to scan the data table with // the previously found order IDs as the ranges BatchScanner dataScanner = conn.createBatchScanner(DATA_TABLE, new Authorizations(), 10); dataScanner.setRanges(orderIds); dataScanner.addScanIterator(new IteratorSetting(1, WholeRowIterator.class)); Text row = new Text(); // The row ID Text colQual = new Text(); // The column qualifier of the current record Long orderkey = null; Long custkey = null; String orderstatus = null; Double totalprice = null; Date orderdate = null; String orderpriority = null; String clerk = null; Long shippriority = null; String comment = null; int numTweets = 0; // Process all of the records returned by the batch scanner for (Map.Entry<Key, Value> entry : dataScanner) { entry.getKey().getRow(row); orderkey = decode(Long.class, row.getBytes(), row.getLength()); SortedMap<Key, Value> rowMap = WholeRowIterator.decodeRow(entry.getKey(), entry.getValue()); for (Map.Entry<Key, Value> record : rowMap.entrySet()) { // Get the column qualifier from the record's key record.getKey().getColumnQualifier(colQual); switch (colQual.toString()) { case CUSTKEY_STR: custkey = decode(Long.class, record.getValue().get()); break; case ORDERSTATUS_STR: orderstatus = decode(String.class, record.getValue().get()); break; case TOTALPRICE_STR: totalprice = decode(Double.class, record.getValue().get()); break; case ORDERDATE_STR: orderdate = decode(Date.class, record.getValue().get()); break; case ORDERPRIORITY_STR: orderpriority = decode(String.class, record.getValue().get()); break; case CLERK_STR: clerk = decode(String.class, record.getValue().get()); break; case SHIPPRIORITY_STR: shippriority = decode(Long.class, record.getValue().get()); break; case COMMENT_STR: comment = decode(String.class, record.getValue().get()); break; default: throw new RuntimeException("Unknown column qualifier " + colQual); } } ++numTweets; // Write the screen name and text to stdout System.out.println(format("%d|%d|%s|%f|%s|%s|%s|%d|%s", orderkey, custkey, orderstatus, totalprice, orderdate, orderpriority, clerk, shippriority, comment)); custkey = null; shippriority = null; orderstatus = null; orderpriority = null; clerk = null; comment = null; totalprice = null; orderdate = null; } // Close the batch scanner dataScanner.close(); long finish = System.currentTimeMillis(); System.out.format("Found %d orders in %s ms\n", numTweets, (finish - start)); return 0; }