Example usage for org.apache.hadoop.io Text append

Introduction

In this page you can find the example usage for org.apache.hadoop.io Text append.

Prototype

public void append(byte[] utf8, int start, int len)

Source Link

Document

Append a range of bytes to the end of the given text

Usage

From source file:com.asakusafw.runtime.io.TsvParser.java

License:Apache License

private void consumeEncoded(Text text) {
    encodeBuffer.flip();/*w ww  .java  2s. c om*/
    if (encodeBuffer.hasRemaining()) {
        text.append(encodeBuffer.array(), encodeBuffer.position(), encodeBuffer.limit());
    }
    encodeBuffer.clear();
}

From source file:com.ashishpaliwal.hadoop.utils.inputformat.CsvLineReader.java

License:Apache License

/**
 * Read from the InputStream into the given Text.
 *
 * @param txt               the object to store the given line
 * @param maxLineLength     the maximum number of bytes to store into txt.
 * @param maxBytesToConsume the maximum number of bytes to consume in this
 *                          call./*w  w w.ja  v a2  s.  c  o  m*/
 * @return the number of bytes read including the newline
 * @throws IOException if the underlying stream throws
 */
public int readLine(Text txt, int maxLineLength, int maxBytesToConsume) throws IOException {
    txt.clear();
    boolean hadFinalNewline = false;
    boolean hadFinalReturn = false;
    boolean hitEndOfFile = false;
    int startPosn = bufferPosn;
    long bytesConsumed = 0;
    boolean inQuote = false;
    boolean isLastCharEscapeChar = false;

    outerLoop: while (true) {
        if (bufferPosn >= bufferLength) {
            if (!backfill()) {
                hitEndOfFile = true;
                break;
            }
        }

        startPosn = bufferPosn;

        for (; bufferPosn < bufferLength; ++bufferPosn) {

            switch (buffer[bufferPosn]) {

            case '\\':
                isLastCharEscapeChar = !isLastCharEscapeChar;
                break;

            case '"':
                if (!inQuote && hadFinalReturn) {
                    break outerLoop;
                }

                if (!isLastCharEscapeChar) {
                    inQuote = !inQuote;
                }
                isLastCharEscapeChar = false;
                break;

            case '\n':
                isLastCharEscapeChar = false;
                if (!inQuote) {
                    hadFinalNewline = true;
                    bufferPosn += 1;
                    break outerLoop;
                }
                break;

            case '\r':
                isLastCharEscapeChar = false;
                if (!inQuote) {
                    if (hadFinalReturn) {
                        // leave this \r in the stream, so we'll get it next time
                        break outerLoop;
                    }
                    hadFinalReturn = true;
                }
                break;

            default:
                isLastCharEscapeChar = false;
                if (!inQuote && hadFinalReturn) {
                    break outerLoop;
                }
            }
        }

        bytesConsumed += bufferPosn - startPosn;
        int length = bufferPosn - startPosn - (hadFinalReturn ? 1 : 0);
        length = Math.min(length, maxLineLength - txt.getLength());

        if (length >= 0)
            txt.append(buffer, startPosn, length);

        if (bytesConsumed >= maxBytesToConsume)
            return (int) Math.min(bytesConsumed, (long) Integer.MAX_VALUE);
    }

    int newlineLength = (hadFinalNewline ? 1 : 0) + (hadFinalReturn ? 1 : 0);

    if (!hitEndOfFile) {
        bytesConsumed += bufferPosn - startPosn;
        int length = bufferPosn - startPosn - newlineLength;
        length = Math.min(length, maxLineLength - txt.getLength());

        if (length > 0)
            txt.append(buffer, startPosn, length);
    }
    return (int) Math.min(bytesConsumed, (long) Integer.MAX_VALUE);
}

From source file:com.blm.orc.DynamicByteArray.java

License:Apache License

/**
 * Set a text value from the bytes in this dynamic array.
 * @param result the value to set/*w w  w .j  a  va 2s  .  c o m*/
 * @param offset the start of the bytes to copy
 * @param length the number of bytes to copy
 */
public void setText(Text result, int offset, int length) {
    result.clear();
    int currentChunk = offset / chunkSize;
    int currentOffset = offset % chunkSize;
    int currentLength = Math.min(length, chunkSize - currentOffset);
    while (length > 0) {
        result.append(data[currentChunk], currentOffset, currentLength);
        length -= currentLength;
        currentChunk += 1;
        currentOffset = 0;
        currentLength = Math.min(length, chunkSize - currentOffset);
    }
}

From source file:com.dinglicom.clouder.mapreduce.input.LineReader.java

License:Apache License

/**
 * Read a line terminated by one of CR, LF, or CRLF.
 *///w w w .j  a  v a  2 s  .com
private int readDefaultLine(Text str, int maxLineLength, int maxBytesToConsume) throws IOException {
    /* We're reading data from in, but the head of the stream may be
     * already buffered in buffer, so we have several cases:
     * 1. No newline characters are in the buffer, so we need to copy
     *    everything and read another buffer from the stream.
     * 2. An unambiguously terminated line is in buffer, so we just
     *    copy to str.
     * 3. Ambiguously terminated line is in buffer, i.e. buffer ends
     *    in CR.  In this case we copy everything up to CR to str, but
     *    we also need to see what follows CR: if it's LF, then we
     *    need consume LF as well, so next call to readLine will read
     *    from after that.
     * We use a flag prevCharCR to signal if previous character was CR
     * and, if it happens to be at the end of the buffer, delay
     * consuming it until we have a chance to look at the char that
     * follows.
     */
    str.clear();
    int txtLength = 0; //tracks str.getLength(), as an optimization
    int newlineLength = 0; //length of terminating newline
    boolean prevCharCR = false; //true of prev char was CR
    long bytesConsumed = 0;
    do {
        int startPosn = bufferPosn; //starting from where we left off the last time
        if (bufferPosn >= bufferLength) {
            startPosn = bufferPosn = 0;
            if (prevCharCR)
                ++bytesConsumed; //account for CR from previous read
            bufferLength = in.read(buffer);
            if (bufferLength <= 0)
                break; // EOF
        }
        for (; bufferPosn < bufferLength; ++bufferPosn) { //search for newline
            if (buffer[bufferPosn] == LF) {
                newlineLength = (prevCharCR) ? 2 : 1;
                ++bufferPosn; // at next invocation proceed from following byte
                break;
            }
            if (prevCharCR) { //CR + notLF, we are at notLF
                newlineLength = 1;
                break;
            }
            prevCharCR = (buffer[bufferPosn] == CR);
        }
        int readLength = bufferPosn - startPosn;
        if (prevCharCR && newlineLength == 0)
            --readLength; //CR at the end of the buffer
        bytesConsumed += readLength;
        int appendLength = readLength - newlineLength;
        if (appendLength > maxLineLength - txtLength) {
            appendLength = maxLineLength - txtLength;
        }
        if (appendLength > 0) {
            str.append(buffer, startPosn, appendLength);
            txtLength += appendLength;
        }
    } while (newlineLength == 0 && bytesConsumed < maxBytesToConsume);

    if (bytesConsumed > (long) Integer.MAX_VALUE)
        throw new IOException("Too many bytes before newline: " + bytesConsumed);
    return (int) bytesConsumed;
}

From source file:com.dinglicom.clouder.mapreduce.input.LineReader.java

License:Apache License

/**
 * Read a line terminated by a custom delimiter.
 *///w  w  w . j  a v  a 2s .  c o m
private int readCustomLine(Text str, int maxLineLength, int maxBytesToConsume) throws IOException {
    str.clear();
    int txtLength = 0; // tracks str.getLength(), as an optimization
    long bytesConsumed = 0;
    int delPosn = 0;
    do {
        int startPosn = bufferPosn; // starting from where we left off the last
        // time
        if (bufferPosn >= bufferLength) {
            startPosn = bufferPosn = 0;
            bufferLength = in.read(buffer);
            if (bufferLength <= 0)
                break; // EOF
        }
        for (; bufferPosn < bufferLength; ++bufferPosn) {
            if (buffer[bufferPosn] == recordDelimiterBytes[delPosn]) {
                delPosn++;
                if (delPosn >= recordDelimiterBytes.length) {
                    bufferPosn++;
                    break;
                }
            } else {
                delPosn = 0;
            }
        }
        int readLength = bufferPosn - startPosn;
        bytesConsumed += readLength;
        int appendLength = readLength - delPosn;
        if (appendLength > maxLineLength - txtLength) {
            appendLength = maxLineLength - txtLength;
        }
        if (appendLength > 0) {
            str.append(buffer, startPosn, appendLength);
            txtLength += appendLength;
        }
    } while (delPosn < recordDelimiterBytes.length && bytesConsumed < maxBytesToConsume);
    if (bytesConsumed > (long) Integer.MAX_VALUE)
        throw new IOException("Too many bytes before delimiter: " + bytesConsumed);
    return (int) bytesConsumed;
}

From source file:com.ery.hadoop.mrddx.file.LineReaders.java

License:Apache License

/**
 * Read a line terminated by one of CR, LF, or CRLF.
 *///from   w  w w.ja  va2s  . co m
private int readDefaultLine(Text str, int maxLineLength, int maxBytesToConsume) throws IOException {
    /*
     * We're reading data from in, but the head of the stream may be already
     * buffered in buffer, so we have several cases: 1. No newline
     * characters are in the buffer, so we need to copy everything and read
     * another buffer from the stream. 2. An unambiguously terminated line
     * is in buffer, so we just copy to str. 3. Ambiguously terminated line
     * is in buffer, i.e. buffer ends in CR. In this case we copy everything
     * up to CR to str, but we also need to see what follows CR: if it's LF,
     * then we need consume LF as well, so next call to readLine will read
     * from after that. We use a flag prevCharCR to signal if previous
     * character was CR and, if it happens to be at the end of the buffer,
     * delay consuming it until we have a chance to look at the char that
     * follows.
     */
    str.clear();
    int txtLength = 0; // tracks str.getLength(), as an optimization
    int newlineLength = 0; // length of terminating newline
    boolean prevCharCR = false; // true of prev char was CR
    long bytesConsumed = 0;
    do {
        int startPosn = bufferPosn; // starting from where we left off the
        // last time
        if (bufferPosn >= bufferLength) {
            startPosn = bufferPosn = 0;
            if (prevCharCR)
                ++bytesConsumed; // account for CR from previous read
            bufferLength = in.read(buffer);
            if (bufferLength <= 0)
                break; // EOF
        }
        for (; bufferPosn < bufferLength; ++bufferPosn) { // search for
            // newline
            if (buffer[bufferPosn] == LF) {
                newlineLength = (prevCharCR) ? 2 : 1;
                ++bufferPosn; // at next invocation proceed from following
                              // byte
                break;
            }
            if (prevCharCR) { // CR + notLF, we are at notLF
                newlineLength = 1;
                break;
            }
            prevCharCR = (buffer[bufferPosn] == CR);
        }
        int readLength = bufferPosn - startPosn;
        if (prevCharCR && newlineLength == 0)
            --readLength; // CR at the end of the buffer
        bytesConsumed += readLength;
        int appendLength = readLength - newlineLength;
        if (appendLength > maxLineLength - txtLength) {
            appendLength = maxLineLength - txtLength;
        }
        if (appendLength > 0) {
            str.append(buffer, startPosn, appendLength);
            txtLength += appendLength;
        }
    } while (newlineLength == 0 && bytesConsumed < maxBytesToConsume);

    if (bytesConsumed > (long) Integer.MAX_VALUE)
        throw new IOException("Too many bytes before newline: " + bytesConsumed);
    return (int) bytesConsumed;
}

From source file:com.ery.hadoop.mrddx.file.LineReaders.java

License:Apache License

/**
 * Read a line terminated by a custom delimiter.
 *///from   w ww  .  j  av  a 2  s .com
private int readCustomLine(Text str, int maxLineLength, int maxBytesToConsume) throws IOException {
    str.clear();
    int txtLength = 0; // tracks str.getLength(), as an optimization
    long bytesConsumed = 0;
    int delPosn = 0;
    do {
        int startPosn = bufferPosn; // starting from where we left off the
        // last
        // time
        if (bufferPosn >= bufferLength) {
            startPosn = bufferPosn = 0;
            bufferLength = in.read(buffer);
            if (bufferLength <= 0)
                break; // EOF
        }
        for (; bufferPosn < bufferLength; ++bufferPosn) {
            if (buffer[bufferPosn] == recordDelimiterBytes[delPosn]) {
                delPosn++;
                if (delPosn >= recordDelimiterBytes.length) {
                    bufferPosn++;
                    break;
                }
            } else {
                delPosn = 0;
            }
        }
        int readLength = bufferPosn - startPosn;
        bytesConsumed += readLength;
        int appendLength = readLength - delPosn;
        if (appendLength > maxLineLength - txtLength) {
            appendLength = maxLineLength - txtLength;
        }
        if (appendLength > 0) {
            str.append(buffer, startPosn, appendLength);
            txtLength += appendLength;
        }
    } while (delPosn < recordDelimiterBytes.length && bytesConsumed < maxBytesToConsume);
    if (bytesConsumed > (long) Integer.MAX_VALUE)
        throw new IOException("Too many bytes before delimiter: " + bytesConsumed);
    return (int) bytesConsumed;
}

From source file:com.kasabi.labs.freebase.mr.Freebase2RDFMapper.java

License:Apache License

private void append(Text text, byte[] bytes) {
    text.append(bytes, 0, bytes.length);
}

From source file:com.kasabi.labs.freebase.mr.Freebase2RDFMapper.java

License:Apache License

private void append(Text text, String str) throws UnsupportedEncodingException {
    byte[] bytes = str.getBytes("UTF-8");
    text.append(bytes, 0, bytes.length);
}

From source file:com.ricemap.spateDB.core.GridInfo.java

License:Apache License

@Override
public Text toText(Text text) {
    final byte[] Comma = ",".getBytes();
    super.toText(text);
    text.append(Comma, 0, Comma.length);
    TextSerializerHelper.serializeLong(layers, text, ',');
    TextSerializerHelper.serializeLong(columns, text, ',');
    TextSerializerHelper.serializeLong(rows, text, '\0');
    return text;//from  w ww .  jav  a 2  s .  co m
}