Example usage for org.apache.hadoop.io Text getBytes

List of usage examples for org.apache.hadoop.io Text getBytes

Introduction

In this page you can find the example usage for org.apache.hadoop.io Text getBytes.

Prototype

@Override
public byte[] getBytes() 

Source Link

Document

Returns the raw bytes; however, only data up to #getLength() is valid.

Usage

From source file:org.elasticsearch.hadoop.mr.WritableValueWriter.java

License:Apache License

@Override
@SuppressWarnings({ "unchecked", "deprecation" })
public Result write(Writable writable, Generator generator) {
    if (writable == null || writable instanceof NullWritable) {
        generator.writeNull();/*from   w  w w.jav  a 2 s  . c o  m*/
    } else if (writable instanceof Text) {
        Text text = (Text) writable;
        generator.writeUTF8String(text.getBytes(), 0, text.getLength());
    } else if (writable instanceof UTF8) {
        UTF8 utf8 = (UTF8) writable;
        generator.writeUTF8String(utf8.getBytes(), 0, utf8.getLength());
    } else if (WritableCompatUtil.isShortWritable(writable)) {
        generator.writeNumber(WritableCompatUtil.unwrap(writable));
    } else if (writable instanceof IntWritable) {
        generator.writeNumber(((IntWritable) writable).get());
    } else if (writable instanceof LongWritable) {
        generator.writeNumber(((LongWritable) writable).get());
    } else if (writable instanceof VLongWritable) {
        generator.writeNumber(((VLongWritable) writable).get());
    } else if (writable instanceof VIntWritable) {
        generator.writeNumber(((VIntWritable) writable).get());
    } else if (writable instanceof ByteWritable) {
        generator.writeNumber(((ByteWritable) writable).get());
    } else if (writable instanceof DoubleWritable) {
        generator.writeNumber(((DoubleWritable) writable).get());
    } else if (writable instanceof FloatWritable) {
        generator.writeNumber(((FloatWritable) writable).get());
    } else if (writable instanceof BooleanWritable) {
        generator.writeBoolean(((BooleanWritable) writable).get());
    } else if (writable instanceof BytesWritable) {
        BytesWritable bw = (BytesWritable) writable;
        generator.writeBinary(bw.getBytes(), 0, bw.getLength());
    } else if (writable instanceof MD5Hash) {
        generator.writeString(writable.toString());
    }

    else if (writable instanceof ArrayWritable) {
        generator.writeBeginArray();
        for (Writable wrt : ((ArrayWritable) writable).get()) {
            Result result = write(wrt, generator);
            if (!result.isSuccesful()) {
                return result;
            }
        }
        generator.writeEndArray();
    }

    else if (writable instanceof AbstractMapWritable) {
        Map<Writable, Writable> map = (Map<Writable, Writable>) writable;

        generator.writeBeginObject();
        // ignore handling sets (which are just maps with null values)
        for (Entry<Writable, Writable> entry : map.entrySet()) {
            String fieldName = entry.getKey().toString();
            if (shouldKeep(generator.getParentPath(), fieldName)) {
                generator.writeFieldName(fieldName);
                Result result = write(entry.getValue(), generator);

                if (!result.isSuccesful()) {
                    return result;
                }
            }
        }
        generator.writeEndObject();
    } else {
        if (writeUnknownTypes) {
            return handleUnknown(writable, generator);
        }
        return Result.FAILED(writable);
    }
    return Result.SUCCESFUL();
}

From source file:org.exoplatform.LogLoader.java

License:Open Source License

/**
 * {@inheritDoc}/*from   www. j  a  v a2  s  .  c  om*/
 */
@Override
public Tuple getNext() throws IOException {
    try {
        if (!in.nextKeyValue()) {
            return null;
        }

        Text value = (Text) in.getCurrentValue();

        DataContext context = new DataContext(value.getBytes());
        Tuple tuple = parseData(context);

        return tuple;
    } catch (InterruptedException e) {
        throw new ExecException(e.getMessage(), e);
    }
}

From source file:org.gestore.hadoop.LongRecordReader.java

License:Apache License

/******
 * Gets one complete entry//from  w w w .j  a v  a  2 s  .c  o  m
 */

private int getEntry(Pattern matcherStart, Pattern matcherStop) throws IOException {
    boolean started = false;
    boolean done = false;

    ByteBuffer newLine = ByteBuffer.allocate(2);
    newLine.putChar('\n');
    byte[] newLineBytes = newLine.array();

    Text tempLine = new Text();
    int totalRead = 0;
    int newRead = 0;
    // Discard lines before start record match, save first line that matches regex
    while (!started) {
        if (lastLine.getLength() <= 0) {
            newRead = in.readLine(tempLine, maxLineLength,
                    Math.max((int) Math.min(Integer.MAX_VALUE, end - pos), maxLineLength));
        } else {
            tempLine = lastLine;
            newRead = lastLine.getLength();
            lastLine = new Text();
        }
        if (newRead == 0) {
            return 0;
        }
        totalRead += newRead;
        Matcher m = matcherStart.matcher(tempLine.toString());
        if (m.matches()) {
            started = true;
            tempLine.append(newLineBytes, 0, newLineBytes.length);
            value.append(tempLine.getBytes(), 0, tempLine.getLength());
            break;
        }
    }

    // Save lines until end record match, save last line
    while (!done) {
        newRead = in.readLine(tempLine, maxLineLength,
                Math.max((int) Math.min(Integer.MAX_VALUE, end - pos), maxLineLength));
        if (newRead == 0) {
            return totalRead;
        }
        totalRead += newRead;
        Matcher m = matcherStop.matcher(tempLine.toString());
        if (m.matches()) {
            done = true;
            lastLine = tempLine;
            return totalRead -= newRead;
        }
        tempLine.append(newLineBytes, 0, newLineBytes.length);
        value.append(tempLine.getBytes(), 0, tempLine.getLength());
    }
    return totalRead;
}

From source file:org.hypertable.hadoop.mapred.HypertableRecordWriter.java

License:Open Source License

/**
 * Write data to HT/*from   www  .ja v a2s .c  om*/
 */
public void write(Text key, Text value) throws IOException {
    try {
        key.append(tab, 0, tab.length);

        m_line.clear();
        m_line.append(key.getBytes(), 0, key.getLength());
        m_line.append(value.getBytes(), 0, value.getLength());
        int len = m_line.getLength();

        int tab_count = 0;
        int tab_pos = 0;
        int found = 0;
        while (found != -1) {
            found = m_line.find(tab_str, found + 1);
            if (found > 0) {
                tab_count++;
                if (tab_count == 1)
                    tab_pos = found;
            }
        }

        boolean has_timestamp;
        if (tab_count >= 3) {
            has_timestamp = true;
        } else if (tab_count == 2) {
            has_timestamp = false;
        } else {
            throw new Exception("incorrect output line format only " + tab_count + " tabs");
        }

        byte[] byte_array = m_line.getBytes();
        int row_offset, row_length;
        int family_offset = 0, family_length = 0;
        int qualifier_offset = 0, qualifier_length = 0;
        int value_offset = 0, value_length = 0;
        long timestamp = SerializedCellsFlag.AUTO_ASSIGN;

        int offset = 0;
        if (has_timestamp) {
            timestamp = Long.parseLong(m_line.decode(byte_array, 0, tab_pos));
            offset = tab_pos + 1;
        }

        row_offset = offset;
        tab_pos = m_line.find(tab_str, offset);
        row_length = tab_pos - row_offset;

        offset = tab_pos + 1;
        family_offset = offset;

        tab_pos = m_line.find(tab_str, offset);
        for (int i = family_offset; i < tab_pos; i++) {
            if (byte_array[i] == ':' && qualifier_offset == 0) {
                family_length = i - family_offset;
                qualifier_offset = i + 1;
            }
        }
        // no qualifier
        if (qualifier_offset == 0)
            family_length = tab_pos - family_offset;
        else
            qualifier_length = tab_pos - qualifier_offset;

        offset = tab_pos + 1;
        value_offset = offset;
        value_length = len - value_offset;

        if (!mCellsWriter.add(byte_array, row_offset, row_length, byte_array, family_offset, family_length,
                byte_array, qualifier_offset, qualifier_length, timestamp, byte_array, value_offset,
                value_length, SerializedCellsFlag.FLAG_INSERT)) {
            mClient.mutator_set_cells_serialized(mMutator, mCellsWriter.buffer(), false);
            mCellsWriter.clear();
            if ((row_length + family_length + qualifier_length + value_length + 32) > mCellsWriter.capacity())
                mCellsWriter = new SerializedCellsWriter(
                        row_length + family_length + qualifier_length + value_length + 32);
            if (!mCellsWriter.add(byte_array, row_offset, row_length, byte_array, family_offset, family_length,
                    byte_array, qualifier_offset, qualifier_length, timestamp, byte_array, value_offset,
                    value_length, SerializedCellsFlag.FLAG_INSERT))
                throw new IOException("Unable to add cell to SerializedCellsWriter " + "(row='"
                        + new String(byte_array, row_offset, row_length, "UTF-8") + "'");
        }
    } catch (Exception e) {
        log.error(e);
        throw new IOException("Unable to write cell - " + e.toString());
    }
}

From source file:org.loggo.search.cli.Search.java

License:Apache License

public void query() throws Exception {
    BatchScanner bs = conn.createBatchScanner(opts.table, Authorizations.EMPTY, 8);
    try {/*from w ww  .  jav a  2 s.  co m*/
        // Compute the user's date range, if any
        SimpleDateFormat sdf = new SimpleDateFormat(LogEntry.DATE_FORMAT);
        String startDate = "";
        if (opts.start != null) {
            startDate = sdf.format(new Date(opts.start));
        }
        String endDate = "9999";
        if (opts.end != null) {
            endDate = sdf.format(new Date(opts.end));
        }
        if (opts.start != null || opts.end != null) {
            // Set the date ranges for each shard
            List<Range> ranges = new ArrayList<>(Schema.SHARDS);
            for (int i = 0; i < Schema.SHARDS; i++) {
                Range r = new Range(String.format(ROW_FORMAT, i, startDate),
                        String.format(ROW_FORMAT, i, endDate));
                ranges.add(r);
            }
            bs.setRanges(ranges);
        } else {
            // full table scan
            bs.setRanges(Collections.singletonList(new Range()));
        }

        // Set the filter for applications and host
        int priority = 100;
        if (!opts.hosts.isEmpty() || !opts.applications.isEmpty()) {
            IteratorSetting is = new IteratorSetting(priority++, HostAndApplicationFilter.class);
            HostAndApplicationFilter.setApps(is, opts.applications);
            HostAndApplicationFilter.setHosts(is, opts.hosts);
            bs.addScanIterator(is);
        }
        // stack the iterators for multiple terms: each term must match to return results
        List<String> families = Arrays.asList(Schema.FAMILIES);
        if (!opts.terms.isEmpty()) {
            for (int i = 0; i < opts.terms.size(); i++) {
                String term = opts.terms.get(i);
                IteratorSetting is;
                if (opts.regexp) {
                    is = new IteratorSetting(priority++, RegExFilter.class);
                    RegExFilter.setRegexs(is, null, null, null, term, false);
                } else {
                    is = new IteratorSetting(priority++, "name" + i, GrepValueFilter.class);
                    GrepValueFilter.setTerm(is, term);
                    if (families.contains(term)) {
                        bs.fetchColumnFamily(new Text(term));
                    }
                }
                bs.addScanIterator(is);
            }
        }

        // Just get the count: don't bother returning whole records
        if (opts.count) {
            IteratorSetting is = new IteratorSetting(priority++, CountingIterator.class);
            bs.addScanIterator(is);
            long total = 0;
            for (Entry<Key, Value> entry : bs) {
                total += Long.parseLong(entry.getValue().toString());
            }
            printer.println(total);
            return;
        }

        // Get stats, not logs
        if (opts.duration != null) {
            final long duration = opts.duration;
            SimpleDateFormat fmt = new SimpleDateFormat(LogEntry.DATE_FORMAT);
            // Stats iterator pulls out counts by CF
            IteratorSetting is = new IteratorSetting(priority++, StatsIterator.class);
            StatsIterator.duration(is, opts.duration, TimeUnit.MILLISECONDS);
            bs.addScanIterator(is);
            // Group counts under the right "bucket" of time
            SortedMap<Long, Map<String, Long>> stats = new TreeMap<>();
            for (Entry<Key, Value> entry : bs) {
                Key key = entry.getKey();
                long ts = StatsIterator.getTs(key, fmt);
                // convert to start time for this bucket
                ts -= ts % duration;
                Map<String, Long> byCF = stats.get(ts);
                if (byCF == null) {
                    stats.put(ts, byCF = new TreeMap<>());
                }
                // Add values, by name given a string: "NAME:VALUE,NAME2:VALUE2"
                String value = entry.getValue().toString();
                if (!value.isEmpty()) {
                    String nameCounts[] = value.split(",");
                    for (String nameCount : nameCounts) {
                        String parts[] = nameCount.split(":");
                        Long current = byCF.get(parts[0]);
                        if (current == null) {
                            current = Long.decode(parts[1]);
                        } else {
                            current = Long.decode(parts[1]) + current.longValue();
                        }
                        byCF.put(parts[0], current);
                    }
                }
            }
            if (stats.isEmpty())
                return;
            // Use the range of the data, or a user specified range, if provided
            long start = stats.firstKey();
            long end = stats.lastKey();
            if (opts.start != null) {
                start = opts.start - (opts.start % duration);
            }
            if (opts.end != null) {
                end = opts.end - (opts.end % duration);
            }
            // Print a line for each bucket, even if there's no data
            for (long time = start; time <= end; time += duration) {
                Map<String, Long> byCF = stats.get(time);
                List<String> byCFList = new ArrayList<>();
                if (byCF != null) {
                    for (Entry<String, Long> entry : byCF.entrySet()) {
                        byCFList.add(String.format("%s: %d", entry.getKey(), entry.getValue()));
                    }
                }
                printer.println(
                        String.format("%s\t%s", fmt.format(new Date(time)), Joiner.on(", ").join(byCFList)));
            }
            return;
        }

        // Read the whole list for sorting. Unfortunately this means it has to fit into memory.
        ArrayList<Entry<Key, Value>> results = new ArrayList<Entry<Key, Value>>();
        for (Entry<Key, Value> entry : bs) {
            results.add(entry);
        }

        if (opts.sort || opts.reverse) {
            final int order = opts.reverse ? -1 : 1;
            Collections.sort(results, new Comparator<Entry<Key, Value>>() {
                @Override
                public int compare(Entry<Key, Value> o1, Entry<Key, Value> o2) {
                    Text row = o1.getKey().getRow();
                    Text row2 = o2.getKey().getRow();
                    return order * BytesWritable.Comparator.compareBytes(row.getBytes(), Schema.SHARD_LENGTH,
                            row.getLength() - Schema.SHARD_LENGTH, row2.getBytes(), Schema.SHARD_LENGTH,
                            row2.getLength() - Schema.SHARD_LENGTH);
                }
            });
        }
        for (Entry<Key, Value> entry : results) {
            String cq = entry.getKey().getColumnQualifier().toString();
            String parts[] = cq.split(Schema.APP_HOST_SEPARATOR);
            String row = entry.getKey().getRow().toString();
            String value = entry.getValue().toString();
            printer.println(String.format("%s\t%s\t%s\t%s", row.substring(Schema.SHARD_LENGTH), parts[0],
                    parts[1], value));
        }
    } finally {
        bs.close();
    }
}

From source file:org.mgrover.hive.translate.GenericUDFTranslate.java

License:Apache License

/**
 * Pre-process the from and to strings populate {@link #replacementMap} and {@link #deletionSet}.
 * /*from w w  w  . j  av a 2  s  . co m*/
 * @param from
 *          from string to be used for translation
 * @param to
 *          to string to be used for translation
 */
private void populateMappings(Text from, Text to) {
    replacementMap.clear();
    deletionSet.clear();

    ByteBuffer fromBytes = ByteBuffer.wrap(from.getBytes(), 0, from.getLength());
    ByteBuffer toBytes = ByteBuffer.wrap(to.getBytes(), 0, to.getLength());

    // Traverse through the from string, one code point at a time
    while (fromBytes.hasRemaining()) {
        // This will also move the iterator ahead by one code point
        int fromCodePoint = Text.bytesToCodePoint(fromBytes);
        // If the to string has more code points, make sure to traverse it too
        if (toBytes.hasRemaining()) {
            int toCodePoint = Text.bytesToCodePoint(toBytes);
            // If the code point from from string already has a replacement or is to be deleted, we
            // don't need to do anything, just move on to the next code point
            if (replacementMap.containsKey(fromCodePoint) || deletionSet.contains(fromCodePoint)) {
                continue;
            }
            replacementMap.put(fromCodePoint, toCodePoint);
        } else {
            // If the code point from from string already has a replacement or is to be deleted, we
            // don't need to do anything, just move on to the next code point
            if (replacementMap.containsKey(fromCodePoint) || deletionSet.contains(fromCodePoint)) {
                continue;
            }
            deletionSet.add(fromCodePoint);
        }
    }
}

From source file:org.mgrover.hive.translate.GenericUDFTranslate.java

License:Apache License

/**
 * Translates the input string based on {@link #replacementMap} and {@link #deletionSet} and
 * returns the translated string.//  w  ww .  j  a va2 s  .  c o  m
 * 
 * @param input
 *          input string to perform the translation on
 * @return translated string
 */
private String processInput(Text input) {
    StringBuilder resultBuilder = new StringBuilder();
    // Obtain the byte buffer from the input string so we can traverse it code point by code point
    ByteBuffer inputBytes = ByteBuffer.wrap(input.getBytes(), 0, input.getLength());
    // Traverse the byte buffer containing the input string one code point at a time
    while (inputBytes.hasRemaining()) {
        int inputCodePoint = Text.bytesToCodePoint(inputBytes);
        // If the code point exists in deletion set, no need to emit out anything for this code point.
        // Continue on to the next code point
        if (deletionSet.contains(inputCodePoint)) {
            continue;
        }

        Integer replacementCodePoint = replacementMap.get(inputCodePoint);
        // If a replacement exists for this code point, emit out the replacement and append it to the
        // output string. If no such replacement exists, emit out the original input code point
        char[] charArray = Character
                .toChars((replacementCodePoint != null) ? replacementCodePoint : inputCodePoint);
        resultBuilder.append(charArray);
    }
    String resultString = resultBuilder.toString();
    return resultString;
}

From source file:org.mitre.ccv.mapred.CalculateCosineDistanceMatrix.java

License:Open Source License

public void writeTextBytes(FSDataOutputStream fos, Text text) throws IOException {
    byte[] bytes = text.getBytes();
    int length = bytes.length;
    fos.writeInt(length);/*  w ww .j  a v a2s . c om*/
    fos.write(bytes, 0, length);
}

From source file:org.mrgeo.data.accumulo.utils.AccumuloUtils.java

License:Apache License

/**
 * Convert a Text object of a tileId to a back to a long.
 *
 * @param rowId Text object to convert.// ww  w  . j a  v  a2 s . c  om
 * @return the long value from the Text object.
 */
public static long toLong(Text rowId) {

    byte[] outB = new byte[8];
    for (int x = 0; x < outB.length; x++) {
        if (x >= rowId.getLength()) {
            outB[x] = 0x0;
        } else {
            outB[x] = rowId.getBytes()[x];
        }
    }

    return ByteBuffer.wrap(outB).getLong();
}

From source file:org.mrgeo.vector.mrsvector.OSMTileIngester.java

License:Apache License

static byte[] getBytes(final Text text) {
    byte[] bytes = text.getBytes();
    if (text.getLength() == bytes.length) {
        return bytes;
    }/*from w  w w  .  j  a v a 2  s. c  o  m*/

    return Arrays.copyOf(bytes, text.getLength());
}