Example usage for org.apache.hadoop.io Text append

Introduction

In this page you can find the example usage for org.apache.hadoop.io Text append.

Prototype

public void append(byte[] utf8, int start, int len)

Source Link

Document

Append a range of bytes to the end of the given text

Usage

From source file:com.ricemap.spateDB.core.Partition.java

License:Apache License

@Override
public Text toText(Text text) {
    super.toText(text);
    byte[] temp = ("," + filename).getBytes();
    text.append(temp, 0, temp.length);
    return text;/*from  w ww .  j  ava2  s .  c o m*/
}

From source file:com.ricemap.spateDB.io.Text2.java

License:Apache License

@Override
public Text toText(Text text) {
    text.append(getBytes(), 0, getLength());
    return text;
}

From source file:com.ricemap.spateDB.io.TextSerializerHelper.java

License:Apache License

/**
 * Appends hex representation of the given number to the given string.
 * If append is set to true, a comma is also appended to the text.
 * @param i/*from   w ww .  ja v  a2  s  . c om*/
 * @param t
 * @param appendComma
 */
public static void serializeHexLong(long i, Text t, char toAppend) {
    // Calculate number of bytes needed to serialize the given long
    int bytes_needed = 0;
    long temp;
    if (i < 0) {
        bytes_needed++; // An additional
        temp = -i;
    } else {
        temp = i;
    }
    do {
        bytes_needed += 1;
        temp >>>= 4;
    } while (temp != 0);

    if (toAppend != '\0')
        bytes_needed++;

    // Reserve the bytes needed in the text
    t.append(ToAppend, 0, bytes_needed);
    // Extract the underlying buffer array and fill it directly
    byte[] buffer = t.getBytes();
    // Position of the next character to write in the text
    int position = t.getLength() - 1;

    if (toAppend != '\0')
        buffer[position--] = (byte) toAppend;

    final int shift = 4;
    final int radix = 1 << shift;
    final long mask = radix - 1;

    // Negative sign is prepended separately for negative numbers
    boolean negative = false;
    if (i < 0) {
        i = -i;
        negative = true;
    }
    do {
        buffer[position--] = digits[(int) (i & mask)];
        i >>>= shift;
    } while (i != 0);
    if (negative)
        buffer[position--] = '-';
}

From source file:com.ricemap.spateDB.io.TextSerializerHelper.java

License:Apache License

/**
 * Appends hex representation of the given number to the given string.
 * If append is set to true, a comma is also appended to the text.
 * @param i/*  www  . j  a  va 2  s .c  om*/
 * @param t
 * @param appendComma
 */
public static void serializeDouble(double d, Text t, char toAppend) {
    byte[] bytes = Double.toString(d).getBytes();
    t.append(bytes, 0, bytes.length);
    if (toAppend != '\0') {
        t.append(new byte[] { (byte) toAppend }, 0, 1);
    }
}

From source file:com.ricemap.spateDB.io.TextSerializerHelper.java

License:Apache License

public static void serializeLong(long i, Text t, char toAppend) {
    // Calculate number of bytes needed to serialize the given long
    int bytes_needed = 0;
    long temp;/*w  ww. j  av  a 2  s  . c  om*/
    if (i < 0) {
        bytes_needed++; // An additional
        temp = -i;
    } else {
        temp = i;
    }
    do {
        bytes_needed += 1;
        temp /= 10;
    } while (temp != 0);

    if (toAppend != '\0')
        bytes_needed++;

    // Reserve the bytes needed in the text
    t.append(ToAppend, 0, bytes_needed);
    // Extract the underlying buffer array and fill it directly
    byte[] buffer = t.getBytes();
    // Position of the next character to write in the text
    int position = t.getLength() - 1;

    if (toAppend != '\0')
        buffer[position--] = (byte) toAppend;

    // Negative sign is prepended separately for negative numbers
    boolean negative = false;
    if (i < 0) {
        i = -i;
        negative = true;
    }
    do {
        int digit = (int) (i % 10);
        buffer[position--] = digits[digit];
        i /= 10;
    } while (i != 0);
    if (negative)
        buffer[position--] = '-';
}

From source file:com.ricemap.spateDB.io.TextSerializerHelper.java

License:Apache License

public static void serializeInt(int i, Text t, char toAppend) {
    // Calculate number of bytes needed to serialize the given long
    int bytes_needed = 0;
    int temp;//from   w ww.  j a v  a  2 s  .co m
    if (i < 0) {
        bytes_needed++; // An additional
        temp = -i;
    } else {
        temp = i;
    }
    do {
        bytes_needed += 1;
        temp /= 10;
    } while (temp != 0);

    if (toAppend != '\0')
        bytes_needed++;

    // Reserve the bytes needed in the text
    t.append(ToAppend, 0, bytes_needed);
    // Extract the underlying buffer array and fill it directly
    byte[] buffer = t.getBytes();
    // Position of the next character to write in the text
    int position = t.getLength() - 1;

    if (toAppend != '\0')
        buffer[position--] = (byte) toAppend;

    // Negative sign is prepended separately for negative numbers
    boolean negative = false;
    if (i < 0) {
        i = -i;
        negative = true;
    }
    do {
        int digit = i % 10;
        buffer[position--] = digits[digit];
        i /= 10;
    } while (i != 0);
    if (negative)
        buffer[position--] = '-';
}

From source file:com.ricemap.spateDB.io.TextSerializerHelper.java

License:Apache License

public static Text serializeMap(Text text, Map<String, String> tags) {
    if (!tags.isEmpty()) {
        boolean first = true;
        text.append(Separators, MapStart, 1);
        for (Map.Entry<String, String> entry : tags.entrySet()) {
            if (first) {
                first = false;// w w  w.j  a v  a  2  s  .  com
            } else {
                first = true;
                text.append(Separators, FieldSeparator, 1);
            }
            byte[] k = entry.getKey().getBytes();
            text.append(k, 0, k.length);
            text.append(Separators, KeyValueSeparator, 1);
            byte[] v = entry.getValue().getBytes();
            text.append(v, 0, v.length);
        }
        text.append(Separators, MapEnd, 1);
    }
    return text;
}

From source file:com.ricemap.spateDB.mapred.SpatialRecordReader.java

License:Apache License

/**
 * Reads the next line from input and return true if a line was read.
 * If no more lines are available in this split, a false is returned.
 * @param value//from   ww  w .j a v a2 s.c  om
 * @return
 * @throws IOException
 */
protected boolean nextLine(Text value) throws IOException {
    if (blockType == BlockType.RTREE && pos == 8) {
        // File is positioned at the RTree header
        // Skip the header and go to first data object in file
        pos += RTree.skipHeader(in);
        LOG.info("Skipped R-tree to position: " + pos);
        // Reinitialize record reader at the new position
        lineReader = new LineReader(in);
    }
    while (getFilePosition() <= end) {
        value.clear();
        int b = 0;
        if (buffer != null) {
            // Read the first line encountered in buffer
            int eol = RTree.skipToEOL(buffer, 0);
            b += eol;
            value.append(buffer, 0, eol);
            if (eol < buffer.length) {
                // There are still some bytes remaining in buffer
                byte[] tmp = new byte[buffer.length - eol];
                System.arraycopy(buffer, eol, tmp, 0, tmp.length);
            } else {
                buffer = null;
            }
            // Check if a complete line has been read from the buffer
            byte last_byte = value.getBytes()[value.getLength() - 1];
            if (last_byte == '\n' || last_byte == '\r')
                return true;
        }

        // Read the first line from stream
        Text temp = new Text();
        b += lineReader.readLine(temp);
        if (b == 0) {
            // Indicates an end of stream
            return false;
        }
        pos += b;

        // Append the part read from stream to the part extracted from buffer
        value.append(temp.getBytes(), 0, temp.getLength());

        if (value.getLength() > 1) {
            // Read a non-empty line. Note that end-of-line character is included
            return true;
        }
    }
    // Reached end of file
    return false;
}

From source file:com.ricemap.spateDB.operations.Tail.java

License:Apache License

/**
 * Reads a maximum of n lines from the stream starting from its current
 * position and going backward./*w w  w .  ja  v a  2 s  .c om*/
 * 
 * @param in - An input stream. It'll be scanned from its current position
 *   backward till position 0
 * @param n - Maximum number of lines to return
 * @param stockObject - An object used to deserialize lines read. It can
 *   be set to <code>null</code> if output is also <code>null</code>. In this
 *   case, nothing is reported to the output.
 * @param output - An output collector used to report lines read.
 * @return - The position of the beginning of the earliest line read from
 *   buffer.
 * @throws IOException
 */
public static <T extends TextSerializable> long tail(FSDataInputStream in, int n, T stockObject,
        ResultCollector<T> output) throws IOException {
    int lines_read = 0;
    long end = in.getPos();
    long offset_of_last_eol = end;
    long last_read_byte = end;

    LongWritable line_offset = new LongWritable();
    Text read_line = new Text();
    Text remainder_from_last_buffer = new Text();
    byte[] buffer = new byte[4096];

    while (last_read_byte > 0 && lines_read < n) {
        // Read next chunk from the back
        long first_byte_to_read = (last_read_byte - 1) - (last_read_byte - 1) % buffer.length;
        in.seek(first_byte_to_read);
        int bytes_to_read = (int) (last_read_byte - first_byte_to_read);
        in.read(buffer, 0, bytes_to_read);
        last_read_byte = first_byte_to_read;

        // Iterate over bytes in this buffer
        int i_last_byte_consumed_in_buffer = bytes_to_read;
        int i_last_byte_examined_in_buffer = bytes_to_read;
        while (i_last_byte_examined_in_buffer > 0 && lines_read < n) {
            byte byte_examined = buffer[--i_last_byte_examined_in_buffer];
            if (byte_examined == '\n' || byte_examined == '\r') {
                // Found an end of line character
                // Report this to output unless it's empty
                long offset_of_this_eol = first_byte_to_read + i_last_byte_examined_in_buffer;
                if (offset_of_last_eol - offset_of_this_eol > 1) {
                    if (output != null) {
                        read_line.clear();
                        // +1 is to skip the EOL at the beginning
                        read_line.append(buffer, i_last_byte_examined_in_buffer + 1,
                                i_last_byte_consumed_in_buffer - (i_last_byte_examined_in_buffer + 1));
                        // Also append bytes remaining from last buffer
                        if (remainder_from_last_buffer.getLength() > 0) {
                            read_line.append(remainder_from_last_buffer.getBytes(), 0,
                                    remainder_from_last_buffer.getLength());
                        }
                        line_offset.set(offset_of_this_eol + 1);
                        stockObject.fromText(read_line);
                        output.collect(stockObject);
                    }
                    lines_read++;
                    remainder_from_last_buffer.clear();
                }
                i_last_byte_consumed_in_buffer = i_last_byte_examined_in_buffer;
                offset_of_last_eol = offset_of_this_eol;
            }
        }
        if (i_last_byte_consumed_in_buffer > 0) {
            // There are still some bytes not consumed in buffer
            if (remainder_from_last_buffer.getLength() == 0) {
                // Store whatever is remaining in remainder_from_last_buffer
                remainder_from_last_buffer.append(buffer, 0, i_last_byte_consumed_in_buffer);
            } else {
                // Prepend remaining bytes to Text
                Text t = new Text();
                t.append(buffer, 0, i_last_byte_consumed_in_buffer);
                t.append(remainder_from_last_buffer.getBytes(), 0, remainder_from_last_buffer.getLength());
                remainder_from_last_buffer = t;
            }
        }
    }

    if (lines_read < n && remainder_from_last_buffer.getLength() > 0) {
        // There is still one last line needs to be reported
        lines_read++;
        if (output != null) {
            read_line = remainder_from_last_buffer;
            line_offset.set(0);
            stockObject.fromText(read_line);
            output.collect(stockObject);
        }
        offset_of_last_eol = -1;
    }

    return offset_of_last_eol + 1;
}

From source file:com.tgam.hadoop.util.GenericEscapedLineReader.java

License:Apache License

/**
 * Read one line from the InputStream into the given Text.  A line
 * can be terminated by one of the following: '\n' (LF) , '\r' (CR),
 * or '\r\n' (CR+LF).  EOF also terminates an otherwise unterminated
 * line./*  w  w w.j  a v a  2  s  .co  m*/
 *
 * @param str the object to store the given line (without newline)
 * @param maxLineLength the maximum number of bytes to store into str;
 *  the rest of the line is silently discarded.
 * @param maxBytesToConsume the maximum number of bytes to consume
 *  in this call.  This is only a hint, because if the line cross
 *  this threshold, we allow it to happen.  It can overshoot
 *  potentially by as much as one buffer length.
 *
 * @return the number of bytes read including the (longest) newline
 * found.
 *
 * @throws IOException if the underlying stream throws
 */
public int readLine(Text str, int maxLineLength, int maxBytesToConsume) throws IOException {
    /* We're reading data from in, but the head of the stream may be
    * already buffered in buffer, so we have several cases:
    * 1. No newline characters are in the buffer, so we need to copy
    *    everything and read another buffer from the stream.
    * 2. An unambiguously terminated line is in buffer, so we just
    *    copy to str.
    * 3. Ambiguously terminated line is in buffer, i.e. buffer ends
    *    in CR.  In this case we copy everything up to CR to str, but
    *    we also need to see what follows CR: if it's LF, then we
    *    need consume LF as well, so next call to readLine will read
    *    from after that.
    * We use a flag prevCharCR to signal if previous character was CR
    * and, if it happens to be at the end of the buffer, delay
    * consuming it until we have a chance to look at the char that
    * follows.
    */
    str.clear();
    int txtLength = 0; //tracks str.getLength(), as an optimization
    int newlineLength = 0; //length of terminating newline
    boolean prevCharCR = false; //true of prev char was CR
    boolean prevCharEscape = false;
    long bytesConsumed = 0;
    do {
        int startPosn = bufferPosn; //starting from where we left off the last time
        if (bufferPosn >= bufferLength) {
            startPosn = bufferPosn = 0;
            if (prevCharCR)
                ++bytesConsumed; //account for CR from previous read
            bufferLength = in.read(buffer);
            if (bufferLength <= 0)
                break; // EOF
        }
        for (; bufferPosn < bufferLength; ++bufferPosn) { //search for newline
            if (buffer[bufferPosn] == LF && !prevCharEscape) {
                newlineLength = (prevCharCR) ? 2 : 1;
                ++bufferPosn; // at next invocation proceed from following byte
                break;
            }
            if (prevCharCR) { //CR + notLF, we are at notLF
                newlineLength = 1;
                break;
            }

            prevCharCR = (buffer[bufferPosn] == CR && !prevCharEscape);
            prevCharEscape = (buffer[bufferPosn] == ESCAPE);
        }
        int readLength = bufferPosn - startPosn;
        if (prevCharCR && newlineLength == 0)
            --readLength; //CR at the end of the buffer
        bytesConsumed += readLength;
        int appendLength = readLength - newlineLength;
        if (appendLength > maxLineLength - txtLength) {
            appendLength = maxLineLength - txtLength;
        }
        if (appendLength > 0) {
            str.append(buffer, startPosn, appendLength);
            txtLength += appendLength;
        }
    } while (newlineLength == 0 && bytesConsumed < maxBytesToConsume);

    if (bytesConsumed > (long) Integer.MAX_VALUE)
        throw new IOException("Too many bytes before newline: " + bytesConsumed);
    return (int) bytesConsumed;
}