Example usage for org.apache.hadoop.io Text copyBytes

Introduction

In this page you can find the example usage for org.apache.hadoop.io Text copyBytes.

Prototype

public byte[] copyBytes()

Source Link

Document

Get a copy of the bytes that is exactly the length of the data.

Usage

From source file:mapred.io.CustomRecordReader.java

License:Apache License

private int skipUtfByteOrderMark() throws IOException {
    Text value = new Text();
    // Strip BOM(Byte Order Mark)
    // Text only support UTF-8, we only need to check UTF-8 BOM
    // (0xEF,0xBB,0xBF) at the start of the text stream.
    int newMaxLineLength = (int) Math.min(3L + (long) maxLineLength, Integer.MAX_VALUE);
    int newSize = in.readLine(value, newMaxLineLength, maxBytesToConsume(pos));
    // Even we read 3 extra bytes for the first line,
    // we won't alter existing behavior (no backwards incompat issue).
    // Because the newSize is less than maxLineLength and
    // the number of bytes copied to Text is always no more than newSize.
    // If the return size from readLine is not less than maxLineLength,
    // we will discard the current line and read the next line.
    pos += newSize;/*from   w  w  w .  j a va  2  s.c o m*/
    int textLength = value.getLength();
    byte[] textBytes = value.getBytes();
    if ((textLength >= 3) && (textBytes[0] == (byte) 0xEF) && (textBytes[1] == (byte) 0xBB)
            && (textBytes[2] == (byte) 0xBF)) {
        // find UTF-8 BOM, strip it.
        LOG.info("Found UTF-8 BOM and skipped it");
        textLength -= 3;
        newSize -= 3;
        if (textLength > 0) {
            // It may work to use the same buffer and not do the copyBytes
            textBytes = value.copyBytes();
            value.set(textBytes, 3, textLength);
        } else {
            value.clear();
        }
    }
    return newSize;
}

From source file:mr.MyFileRecordReader2.java

License:Apache License

private int skipUtfByteOrderMark(Text value) throws IOException {
    // Strip BOM(Byte Order Mark)
    // Text only support UTF-8, we only need to check UTF-8 BOM
    // (0xEF,0xBB,0xBF) at the start of the text stream.
    int newMaxLineLength = (int) Math.min(3L + (long) maxLineLength, Integer.MAX_VALUE);
    int newSize = in.readLine(value, newMaxLineLength, maxBytesToConsume(pos));
    // Even we read 3 extra bytes for the first line,
    // we won't alter existing behavior (no backwards incompat issue).
    // Because the newSize is less than maxLineLength and
    // the number of bytes copied to Text is always no more than newSize.
    // If the return size from readLine is not less than maxLineLength,
    // we will discard the current line and read the next line.
    pos += newSize;/*  ww  w  .j a  v a2s. com*/
    int textLength = value.getLength();
    byte[] textBytes = value.getBytes();
    if ((textLength >= 3) && (textBytes[0] == (byte) 0xEF) && (textBytes[1] == (byte) 0xBB)
            && (textBytes[2] == (byte) 0xBF)) {
        // find UTF-8 BOM, strip it.
        LOG.info("Found UTF-8 BOM and skipped it");
        textLength -= 3;
        newSize -= 3;
        if (textLength > 0) {
            // It may work to use the same buffer and not do the copyBytes
            textBytes = value.copyBytes();
            value.set(textBytes, 3, textLength);
        } else {
            value.clear();
        }
    }
    return newSize;
}

From source file:mvm.rya.indexing.accumulo.temporal.AccumuloTemporalIndexerTest.java

License:Apache License

static String toHumanString(Text text) {
    return toHumanString(text == null ? null : text.copyBytes());
}

From source file:mvm.rya.indexing.KeyParts.java

License:Apache License

public static String toHumanString(Text text) {
    return toHumanString(text == null ? null : text.copyBytes());
}

From source file:org.apache.accumulo.core.data.LoadPlan.java

License:Apache License

private static byte[] copy(Text data) {
    return data == null ? null : data.copyBytes();
}

From source file:org.apache.rya.indexing.KeyParts.java

License:Apache License

public static String toHumanString(final Text text) {
    return toHumanString(text == null ? null : text.copyBytes());
}

From source file:org.utils.UnsplittableFileReader.java

License:Apache License

@Override
public synchronized boolean nextKeyValue() throws IOException {
    boolean res = reader.nextKeyValue();
    if (res) {//from   ww w.  ja va2  s .  c  o m
        LongWritable lineNumber = reader.getCurrentKey();
        Text lineString = reader.getCurrentValue();

        key.clear();
        key.setFilename(filename);
        key.setLine(lineNumber.get());

        value.clear();
        value.set(lineString.copyBytes());

        LOG.debug("read " + key);
    }

    return res;
}