Example usage for org.apache.hadoop.io Text set

List of usage examples for org.apache.hadoop.io Text set

Introduction

In this page you can find the example usage for org.apache.hadoop.io Text set.

Prototype

public void set(byte[] utf8, int start, int len) 

Source Link

Document

Set the Text to range of bytes

Usage

From source file:TweetTweetTweet.java

License:Open Source License

@Override
public void fromText(Text text) {
    tweet1.fromText(text);/* w  w  w.  j  a  v  a2  s . c o m*/
    // Skip the Tab
    text.set(text.getBytes(), 1, text.getLength() - 1);
    tweet2.fromText(text);
    // Skip the Tab
    text.set(text.getBytes(), 1, text.getLength() - 1);
    tweet3.fromText(text);
}

From source file:TestString.java

License:Apache License

@Test
public void testTextSubstring() throws Exception {
    Text text = new Text("string");
    Text text1 = new Text();
    Text text2 = new Text();

    long start = System.nanoTime();
    for (int i = 0; i < 100000000; i++) {
        text1.set(text.getBytes(), 0, 2);
        text2.set(text.getBytes(), 3, text.getLength() - 3);
    }/*from w ww. j a  v a  2s.  c o  m*/
    long end = System.nanoTime();
    System.out.println("TestTextSubString");
    System.out.println("text1: " + text1.toString());
    System.out.println("text2: " + text2.toString());
    System.out.println("Elapsed Time: " + (end - start) / 1000000000f + " seconds.");
}

From source file:co.nubetech.hiho.dedup.DelimitedLineRecordReader.java

License:Apache License

public Text getColumn(Text val, int column, String delimiter) throws IOException {
    if (delimiter == null || delimiter.equals("")) {
        throw new IOException("Value of delimiter is empty");
    }/*from w w  w  . j av  a  2  s  .c  o m*/
    int lastOccurance = 0;
    int occurance = 0;
    for (int i = 0; i < column; i++) {
        occurance = val.find(delimiter, lastOccurance) - lastOccurance;
        lastOccurance = lastOccurance + occurance + delimiter.length();
    }

    logger.debug("text value is: " + val);
    int delimiterLength = delimiter.length();
    int startPosition = lastOccurance - (occurance + delimiterLength);
    Text keyColumn = new Text();
    keyColumn.set(val.getBytes(), startPosition, occurance);
    return keyColumn;
}

From source file:com.asakusafw.runtime.value.StringOptionUtil.java

License:Apache License

/**
 * Trims the leading/trailing classical whitespace characters in the {@link StringOption}.
 * This only removes the following characters:
 * <ul>/*from  w w  w  .jav  a  2 s  .c om*/
 * <li> {@code "\t" (HT:U+0009)} </li>
 * <li> {@code "\n" (LF:U+000a)} </li>
 * <li> {@code "\r" (CR:U+000d)} </li>
 * <li> {@code " " (SP:U+0020)} </li>
 * </ul>
 * This directly modifies the target {@link StringOption}.
 * @param option the target {@link StringOption}
 * @throws NullPointerException if the {@link StringOption} is/represents {@code null}
 */
public static void trim(StringOption option) {
    Text text = option.get();
    byte[] bytes = text.getBytes();
    int length = text.getLength();
    int start = 0;
    int last = length - 1;
    for (; start <= last; start++) {
        if (isTrimTarget(bytes[start]) == false) {
            break;
        }
    }
    for (; last >= start; last--) {
        if (isTrimTarget(bytes[last]) == false) {
            break;
        }
    }
    if (start == 0 && last == length - 1) {
        return;
    }
    text.set(bytes, start, last + 1 - start);
}

From source file:com.datasalt.utils.mapred.joiner.MultiJoinReducer.java

License:Apache License

public Text deserializeKey(MultiJoinPair pair, Text obj) throws IOException {
    obj.set(pair.getMultiJoinGroup().getBytes(), 0, pair.getMultiJoinGroup().getLength());
    return obj;//from   w w w  .j a  v a2  s  .  c  o  m
}

From source file:com.ebay.nest.io.sede.binarysortable.BinarySortableSerDe.java

License:Apache License

static Text deserializeText(InputByteBuffer buffer, boolean invert, Text r) throws IOException {
    // Get the actual length first
    int start = buffer.tell();
    int length = 0;
    do {/*  ww w .  j ava 2 s.c  om*/
        byte b = buffer.read(invert);
        if (b == 0) {
            // end of string
            break;
        }
        if (b == 1) {
            // the last char is an escape char. read the actual char
            buffer.read(invert);
        }
        length++;
    } while (true);

    if (length == buffer.tell() - start) {
        // No escaping happened, so we are already done.
        r.set(buffer.getData(), start, length);
    } else {
        // Escaping happened, we need to copy byte-by-byte.
        // 1. Set the length first.
        r.set(buffer.getData(), start, length);
        // 2. Reset the pointer.
        buffer.seek(start);
        // 3. Copy the data.
        byte[] rdata = r.getBytes();
        for (int i = 0; i < length; i++) {
            byte b = buffer.read(invert);
            if (b == 1) {
                // The last char is an escape char, read the actual char.
                // The serialization format escape \0 to \1, and \1 to \2,
                // to make sure the string is null-terminated.
                b = (byte) (buffer.read(invert) - 1);
            }
            rdata[i] = b;
        }
        // 4. Read the null terminator.
        byte b = buffer.read(invert);
        assert (b == 0);
    }
    return r;
}

From source file:com.ebay.nest.io.sede.lazybinary.LazyBinaryHiveVarchar.java

License:Apache License

@Override
public void init(ByteArrayRef bytes, int start, int length) {
    // re-use existing text member in varchar writable
    Text textValue = data.getTextValue();
    textValue.set(bytes.getData(), start, length);
    data.enforceMaxLength(maxLength);//ww w. ja  va2s.  c om
}

From source file:com.ebay.nest.io.sede.SerDeUtils.java

License:Apache License

static void buildJSONString(StringBuilder sb, Object o, ObjectInspector oi, String nullStr) {

    switch (oi.getCategory()) {
    case PRIMITIVE: {
        PrimitiveObjectInspector poi = (PrimitiveObjectInspector) oi;
        if (o == null) {
            sb.append(nullStr);/* w w  w . j  ava 2  s.  c om*/
        } else {
            switch (poi.getPrimitiveCategory()) {
            case BOOLEAN: {
                boolean b = ((BooleanObjectInspector) poi).get(o);
                sb.append(b ? "true" : "false");
                break;
            }
            case BYTE: {
                sb.append(((ByteObjectInspector) poi).get(o));
                break;
            }
            case SHORT: {
                sb.append(((ShortObjectInspector) poi).get(o));
                break;
            }
            case INT: {
                sb.append(((IntObjectInspector) poi).get(o));
                break;
            }
            case LONG: {
                sb.append(((LongObjectInspector) poi).get(o));
                break;
            }
            case FLOAT: {
                sb.append(((FloatObjectInspector) poi).get(o));
                break;
            }
            case DOUBLE: {
                sb.append(((DoubleObjectInspector) poi).get(o));
                break;
            }
            case STRING: {
                sb.append('"');
                sb.append(escapeString(((StringObjectInspector) poi).getPrimitiveJavaObject(o)));
                sb.append('"');
                break;
            }
            case VARCHAR: {
                sb.append('"');
                sb.append(
                        escapeString(((HiveVarcharObjectInspector) poi).getPrimitiveJavaObject(o).toString()));
                sb.append('"');
                break;
            }
            case DATE: {
                sb.append('"');
                sb.append(((DateObjectInspector) poi).getPrimitiveWritableObject(o));
                sb.append('"');
                break;
            }
            case TIMESTAMP: {
                sb.append('"');
                sb.append(((TimestampObjectInspector) poi).getPrimitiveWritableObject(o));
                sb.append('"');
                break;
            }
            case BINARY: {
                BytesWritable bw = ((BinaryObjectInspector) oi).getPrimitiveWritableObject(o);
                Text txt = new Text();
                txt.set(bw.getBytes(), 0, bw.getLength());
                sb.append(txt.toString());
                break;
            }
            case DECIMAL: {
                sb.append(((HiveDecimalObjectInspector) oi).getPrimitiveJavaObject(o));
                break;
            }
            default:
                throw new RuntimeException("Unknown primitive type: " + poi.getPrimitiveCategory());
            }
        }
        break;
    }
    case LIST: {
        ListObjectInspector loi = (ListObjectInspector) oi;
        ObjectInspector listElementObjectInspector = loi.getListElementObjectInspector();
        List<?> olist = loi.getList(o);
        if (olist == null) {
            sb.append(nullStr);
        } else {
            sb.append(LBRACKET);
            for (int i = 0; i < olist.size(); i++) {
                if (i > 0) {
                    sb.append(COMMA);
                }
                buildJSONString(sb, olist.get(i), listElementObjectInspector, JSON_NULL);
            }
            sb.append(RBRACKET);
        }
        break;
    }
    case MAP: {
        MapObjectInspector moi = (MapObjectInspector) oi;
        ObjectInspector mapKeyObjectInspector = moi.getMapKeyObjectInspector();
        ObjectInspector mapValueObjectInspector = moi.getMapValueObjectInspector();
        Map<?, ?> omap = moi.getMap(o);
        if (omap == null) {
            sb.append(nullStr);
        } else {
            sb.append(LBRACE);
            boolean first = true;
            for (Object entry : omap.entrySet()) {
                if (first) {
                    first = false;
                } else {
                    sb.append(COMMA);
                }
                Map.Entry<?, ?> e = (Map.Entry<?, ?>) entry;
                buildJSONString(sb, e.getKey(), mapKeyObjectInspector, JSON_NULL);
                sb.append(COLON);
                buildJSONString(sb, e.getValue(), mapValueObjectInspector, JSON_NULL);
            }
            sb.append(RBRACE);
        }
        break;
    }
    case STRUCT: {
        StructObjectInspector soi = (StructObjectInspector) oi;
        List<? extends StructField> structFields = soi.getAllStructFieldRefs();
        if (o == null) {
            sb.append(nullStr);
        } else {
            sb.append(LBRACE);
            for (int i = 0; i < structFields.size(); i++) {
                if (i > 0) {
                    sb.append(COMMA);
                }
                sb.append(QUOTE);
                sb.append(structFields.get(i).getFieldName());
                sb.append(QUOTE);
                sb.append(COLON);
                buildJSONString(sb, soi.getStructFieldData(o, structFields.get(i)),
                        structFields.get(i).getFieldObjectInspector(), JSON_NULL);
            }
            sb.append(RBRACE);
        }
        break;
    }
    case UNION: {
        UnionObjectInspector uoi = (UnionObjectInspector) oi;
        if (o == null) {
            sb.append(nullStr);
        } else {
            sb.append(LBRACE);
            sb.append(uoi.getTag(o));
            sb.append(COLON);
            buildJSONString(sb, uoi.getField(o), uoi.getObjectInspectors().get(uoi.getTag(o)), JSON_NULL);
            sb.append(RBRACE);
        }
        break;
    }
    default:
        throw new RuntimeException("Unknown type in ObjectInspector!");
    }
}

From source file:com.facebook.hive.orc.DynamicByteArray.java

License:Open Source License

/**
 * Set a text value from the bytes in this dynamic array.
 * @param result the value to set/*from w  w w. j a va  2  s.c  o m*/
 * @param offset the start of the bytes to copy
 * @param length the number of bytes to copy
 */
public void setText(Text result, int offset, int length) {
    result.clear();
    result.set(data.getBytes(), offset, length);
}

From source file:com.facebook.presto.accumulo.AccumuloClient.java

License:Apache License

/**
 * Gets the TabletServer hostname for where the given key is located in the given table
 *
 * @param table Fully-qualified table name
 * @param key Key to locate/*from  w ww .  ja  v a2 s  .  c  om*/
 * @return The tablet location, or DUMMY_LOCATION if an error occurs
 */
private Optional<String> getTabletLocation(String table, Key key) {
    try {
        // Get the Accumulo table ID so we can scan some fun stuff
        String tableId = connector.tableOperations().tableIdMap().get(table);

        // Create our scanner against the metadata table, fetching 'loc' family
        Scanner scanner = connector.createScanner("accumulo.metadata", auths);
        scanner.fetchColumnFamily(new Text("loc"));

        // Set the scan range to just this table, from the table ID to the default tablet
        // row, which is the last listed tablet
        Key defaultTabletRow = new Key(tableId + '<');
        Key start = new Key(tableId);
        Key end = defaultTabletRow.followingKey(PartialKey.ROW);
        scanner.setRange(new Range(start, end));

        Optional<String> location = Optional.empty();
        if (key == null) {
            // if the key is null, then it is -inf, so get first tablet location
            Iterator<Entry<Key, Value>> iter = scanner.iterator();
            if (iter.hasNext()) {
                location = Optional.of(iter.next().getValue().toString());
            }
        } else {
            // Else, we will need to scan through the tablet location data and find the location

            // Create some text objects to do comparison for what we are looking for
            Text splitCompareKey = new Text();
            key.getRow(splitCompareKey);
            Text scannedCompareKey = new Text();

            // Scan the table!
            for (Entry<Key, Value> entry : scanner) {
                // Get the bytes of the key
                byte[] keyBytes = entry.getKey().getRow().copyBytes();

                // If the last byte is <, then we have hit the default tablet, so use this location
                if (keyBytes[keyBytes.length - 1] == '<') {
                    location = Optional.of(entry.getValue().toString());
                    break;
                } else {
                    // Chop off some magic nonsense
                    scannedCompareKey.set(keyBytes, 3, keyBytes.length - 3);

                    // Compare the keys, moving along the tablets until the location is found
                    if (scannedCompareKey.getLength() > 0) {
                        int compareTo = splitCompareKey.compareTo(scannedCompareKey);
                        if (compareTo <= 0) {
                            location = Optional.of(entry.getValue().toString());
                        } else {
                            // all future tablets will be greater than this key
                            break;
                        }
                    }
                }
            }
            scanner.close();
        }

        // If we were unable to find the location for some reason, return the default tablet
        // location
        return location.isPresent() ? location : getDefaultTabletLocation(table);
    } catch (Exception e) {
        // Swallow this exception so the query does not fail due to being unable
        // to locate the tablet server for the provided Key.
        // This is purely an optimization, but we will want to log the error.
        LOG.error("Failed to get tablet location, returning dummy location", e);
        return Optional.empty();
    }
}