List of usage examples for org.apache.hadoop.io Text set
public void set(byte[] utf8, int start, int len)
From source file:TweetTweetTweet.java
License:Open Source License
@Override public void fromText(Text text) { tweet1.fromText(text);/* w w w. j a v a2 s . c o m*/ // Skip the Tab text.set(text.getBytes(), 1, text.getLength() - 1); tweet2.fromText(text); // Skip the Tab text.set(text.getBytes(), 1, text.getLength() - 1); tweet3.fromText(text); }
From source file:TestString.java
License:Apache License
@Test public void testTextSubstring() throws Exception { Text text = new Text("string"); Text text1 = new Text(); Text text2 = new Text(); long start = System.nanoTime(); for (int i = 0; i < 100000000; i++) { text1.set(text.getBytes(), 0, 2); text2.set(text.getBytes(), 3, text.getLength() - 3); }/*from w ww. j a v a 2s. c o m*/ long end = System.nanoTime(); System.out.println("TestTextSubString"); System.out.println("text1: " + text1.toString()); System.out.println("text2: " + text2.toString()); System.out.println("Elapsed Time: " + (end - start) / 1000000000f + " seconds."); }
From source file:co.nubetech.hiho.dedup.DelimitedLineRecordReader.java
License:Apache License
public Text getColumn(Text val, int column, String delimiter) throws IOException { if (delimiter == null || delimiter.equals("")) { throw new IOException("Value of delimiter is empty"); }/*from w w w . j av a 2 s .c o m*/ int lastOccurance = 0; int occurance = 0; for (int i = 0; i < column; i++) { occurance = val.find(delimiter, lastOccurance) - lastOccurance; lastOccurance = lastOccurance + occurance + delimiter.length(); } logger.debug("text value is: " + val); int delimiterLength = delimiter.length(); int startPosition = lastOccurance - (occurance + delimiterLength); Text keyColumn = new Text(); keyColumn.set(val.getBytes(), startPosition, occurance); return keyColumn; }
From source file:com.asakusafw.runtime.value.StringOptionUtil.java
License:Apache License
/** * Trims the leading/trailing classical whitespace characters in the {@link StringOption}. * This only removes the following characters: * <ul>/*from w w w .jav a 2 s .c om*/ * <li> {@code "\t" (HT:U+0009)} </li> * <li> {@code "\n" (LF:U+000a)} </li> * <li> {@code "\r" (CR:U+000d)} </li> * <li> {@code " " (SP:U+0020)} </li> * </ul> * This directly modifies the target {@link StringOption}. * @param option the target {@link StringOption} * @throws NullPointerException if the {@link StringOption} is/represents {@code null} */ public static void trim(StringOption option) { Text text = option.get(); byte[] bytes = text.getBytes(); int length = text.getLength(); int start = 0; int last = length - 1; for (; start <= last; start++) { if (isTrimTarget(bytes[start]) == false) { break; } } for (; last >= start; last--) { if (isTrimTarget(bytes[last]) == false) { break; } } if (start == 0 && last == length - 1) { return; } text.set(bytes, start, last + 1 - start); }
From source file:com.datasalt.utils.mapred.joiner.MultiJoinReducer.java
License:Apache License
public Text deserializeKey(MultiJoinPair pair, Text obj) throws IOException { obj.set(pair.getMultiJoinGroup().getBytes(), 0, pair.getMultiJoinGroup().getLength()); return obj;//from w w w .j a v a2 s . c o m }
From source file:com.ebay.nest.io.sede.binarysortable.BinarySortableSerDe.java
License:Apache License
static Text deserializeText(InputByteBuffer buffer, boolean invert, Text r) throws IOException { // Get the actual length first int start = buffer.tell(); int length = 0; do {/* ww w . j ava 2 s.c om*/ byte b = buffer.read(invert); if (b == 0) { // end of string break; } if (b == 1) { // the last char is an escape char. read the actual char buffer.read(invert); } length++; } while (true); if (length == buffer.tell() - start) { // No escaping happened, so we are already done. r.set(buffer.getData(), start, length); } else { // Escaping happened, we need to copy byte-by-byte. // 1. Set the length first. r.set(buffer.getData(), start, length); // 2. Reset the pointer. buffer.seek(start); // 3. Copy the data. byte[] rdata = r.getBytes(); for (int i = 0; i < length; i++) { byte b = buffer.read(invert); if (b == 1) { // The last char is an escape char, read the actual char. // The serialization format escape \0 to \1, and \1 to \2, // to make sure the string is null-terminated. b = (byte) (buffer.read(invert) - 1); } rdata[i] = b; } // 4. Read the null terminator. byte b = buffer.read(invert); assert (b == 0); } return r; }
From source file:com.ebay.nest.io.sede.lazybinary.LazyBinaryHiveVarchar.java
License:Apache License
@Override public void init(ByteArrayRef bytes, int start, int length) { // re-use existing text member in varchar writable Text textValue = data.getTextValue(); textValue.set(bytes.getData(), start, length); data.enforceMaxLength(maxLength);//ww w. ja va2s. c om }
From source file:com.ebay.nest.io.sede.SerDeUtils.java
License:Apache License
static void buildJSONString(StringBuilder sb, Object o, ObjectInspector oi, String nullStr) { switch (oi.getCategory()) { case PRIMITIVE: { PrimitiveObjectInspector poi = (PrimitiveObjectInspector) oi; if (o == null) { sb.append(nullStr);/* w w w . j ava 2 s. c om*/ } else { switch (poi.getPrimitiveCategory()) { case BOOLEAN: { boolean b = ((BooleanObjectInspector) poi).get(o); sb.append(b ? "true" : "false"); break; } case BYTE: { sb.append(((ByteObjectInspector) poi).get(o)); break; } case SHORT: { sb.append(((ShortObjectInspector) poi).get(o)); break; } case INT: { sb.append(((IntObjectInspector) poi).get(o)); break; } case LONG: { sb.append(((LongObjectInspector) poi).get(o)); break; } case FLOAT: { sb.append(((FloatObjectInspector) poi).get(o)); break; } case DOUBLE: { sb.append(((DoubleObjectInspector) poi).get(o)); break; } case STRING: { sb.append('"'); sb.append(escapeString(((StringObjectInspector) poi).getPrimitiveJavaObject(o))); sb.append('"'); break; } case VARCHAR: { sb.append('"'); sb.append( escapeString(((HiveVarcharObjectInspector) poi).getPrimitiveJavaObject(o).toString())); sb.append('"'); break; } case DATE: { sb.append('"'); sb.append(((DateObjectInspector) poi).getPrimitiveWritableObject(o)); sb.append('"'); break; } case TIMESTAMP: { sb.append('"'); sb.append(((TimestampObjectInspector) poi).getPrimitiveWritableObject(o)); sb.append('"'); break; } case BINARY: { BytesWritable bw = ((BinaryObjectInspector) oi).getPrimitiveWritableObject(o); Text txt = new Text(); txt.set(bw.getBytes(), 0, bw.getLength()); sb.append(txt.toString()); break; } case DECIMAL: { sb.append(((HiveDecimalObjectInspector) oi).getPrimitiveJavaObject(o)); break; } default: throw new RuntimeException("Unknown primitive type: " + poi.getPrimitiveCategory()); } } break; } case LIST: { ListObjectInspector loi = (ListObjectInspector) oi; ObjectInspector listElementObjectInspector = loi.getListElementObjectInspector(); List<?> olist = loi.getList(o); if (olist == null) { sb.append(nullStr); } else { sb.append(LBRACKET); for (int i = 0; i < olist.size(); i++) { if (i > 0) { sb.append(COMMA); } buildJSONString(sb, olist.get(i), listElementObjectInspector, JSON_NULL); } sb.append(RBRACKET); } break; } case MAP: { MapObjectInspector moi = (MapObjectInspector) oi; ObjectInspector mapKeyObjectInspector = moi.getMapKeyObjectInspector(); ObjectInspector mapValueObjectInspector = moi.getMapValueObjectInspector(); Map<?, ?> omap = moi.getMap(o); if (omap == null) { sb.append(nullStr); } else { sb.append(LBRACE); boolean first = true; for (Object entry : omap.entrySet()) { if (first) { first = false; } else { sb.append(COMMA); } Map.Entry<?, ?> e = (Map.Entry<?, ?>) entry; buildJSONString(sb, e.getKey(), mapKeyObjectInspector, JSON_NULL); sb.append(COLON); buildJSONString(sb, e.getValue(), mapValueObjectInspector, JSON_NULL); } sb.append(RBRACE); } break; } case STRUCT: { StructObjectInspector soi = (StructObjectInspector) oi; List<? extends StructField> structFields = soi.getAllStructFieldRefs(); if (o == null) { sb.append(nullStr); } else { sb.append(LBRACE); for (int i = 0; i < structFields.size(); i++) { if (i > 0) { sb.append(COMMA); } sb.append(QUOTE); sb.append(structFields.get(i).getFieldName()); sb.append(QUOTE); sb.append(COLON); buildJSONString(sb, soi.getStructFieldData(o, structFields.get(i)), structFields.get(i).getFieldObjectInspector(), JSON_NULL); } sb.append(RBRACE); } break; } case UNION: { UnionObjectInspector uoi = (UnionObjectInspector) oi; if (o == null) { sb.append(nullStr); } else { sb.append(LBRACE); sb.append(uoi.getTag(o)); sb.append(COLON); buildJSONString(sb, uoi.getField(o), uoi.getObjectInspectors().get(uoi.getTag(o)), JSON_NULL); sb.append(RBRACE); } break; } default: throw new RuntimeException("Unknown type in ObjectInspector!"); } }
From source file:com.facebook.hive.orc.DynamicByteArray.java
License:Open Source License
/** * Set a text value from the bytes in this dynamic array. * @param result the value to set/*from w w w. j a va 2 s.c o m*/ * @param offset the start of the bytes to copy * @param length the number of bytes to copy */ public void setText(Text result, int offset, int length) { result.clear(); result.set(data.getBytes(), offset, length); }
From source file:com.facebook.presto.accumulo.AccumuloClient.java
License:Apache License
/** * Gets the TabletServer hostname for where the given key is located in the given table * * @param table Fully-qualified table name * @param key Key to locate/*from w ww . ja v a2 s . c om*/ * @return The tablet location, or DUMMY_LOCATION if an error occurs */ private Optional<String> getTabletLocation(String table, Key key) { try { // Get the Accumulo table ID so we can scan some fun stuff String tableId = connector.tableOperations().tableIdMap().get(table); // Create our scanner against the metadata table, fetching 'loc' family Scanner scanner = connector.createScanner("accumulo.metadata", auths); scanner.fetchColumnFamily(new Text("loc")); // Set the scan range to just this table, from the table ID to the default tablet // row, which is the last listed tablet Key defaultTabletRow = new Key(tableId + '<'); Key start = new Key(tableId); Key end = defaultTabletRow.followingKey(PartialKey.ROW); scanner.setRange(new Range(start, end)); Optional<String> location = Optional.empty(); if (key == null) { // if the key is null, then it is -inf, so get first tablet location Iterator<Entry<Key, Value>> iter = scanner.iterator(); if (iter.hasNext()) { location = Optional.of(iter.next().getValue().toString()); } } else { // Else, we will need to scan through the tablet location data and find the location // Create some text objects to do comparison for what we are looking for Text splitCompareKey = new Text(); key.getRow(splitCompareKey); Text scannedCompareKey = new Text(); // Scan the table! for (Entry<Key, Value> entry : scanner) { // Get the bytes of the key byte[] keyBytes = entry.getKey().getRow().copyBytes(); // If the last byte is <, then we have hit the default tablet, so use this location if (keyBytes[keyBytes.length - 1] == '<') { location = Optional.of(entry.getValue().toString()); break; } else { // Chop off some magic nonsense scannedCompareKey.set(keyBytes, 3, keyBytes.length - 3); // Compare the keys, moving along the tablets until the location is found if (scannedCompareKey.getLength() > 0) { int compareTo = splitCompareKey.compareTo(scannedCompareKey); if (compareTo <= 0) { location = Optional.of(entry.getValue().toString()); } else { // all future tablets will be greater than this key break; } } } } scanner.close(); } // If we were unable to find the location for some reason, return the default tablet // location return location.isPresent() ? location : getDefaultTabletLocation(table); } catch (Exception e) { // Swallow this exception so the query does not fail due to being unable // to locate the tablet server for the provided Key. // This is purely an optimization, but we will want to log the error. LOG.error("Failed to get tablet location, returning dummy location", e); return Optional.empty(); } }