List of usage examples for org.apache.hadoop.io Text append
public void append(byte[] utf8, int start, int len)
From source file:com.ricemap.spateDB.core.Partition.java
License:Apache License
@Override public Text toText(Text text) { super.toText(text); byte[] temp = ("," + filename).getBytes(); text.append(temp, 0, temp.length); return text;/*from w ww . j ava2 s . c o m*/ }
From source file:com.ricemap.spateDB.io.Text2.java
License:Apache License
@Override public Text toText(Text text) { text.append(getBytes(), 0, getLength()); return text; }
From source file:com.ricemap.spateDB.io.TextSerializerHelper.java
License:Apache License
/** * Appends hex representation of the given number to the given string. * If append is set to true, a comma is also appended to the text. * @param i/*from w ww . ja v a2 s . c om*/ * @param t * @param appendComma */ public static void serializeHexLong(long i, Text t, char toAppend) { // Calculate number of bytes needed to serialize the given long int bytes_needed = 0; long temp; if (i < 0) { bytes_needed++; // An additional temp = -i; } else { temp = i; } do { bytes_needed += 1; temp >>>= 4; } while (temp != 0); if (toAppend != '\0') bytes_needed++; // Reserve the bytes needed in the text t.append(ToAppend, 0, bytes_needed); // Extract the underlying buffer array and fill it directly byte[] buffer = t.getBytes(); // Position of the next character to write in the text int position = t.getLength() - 1; if (toAppend != '\0') buffer[position--] = (byte) toAppend; final int shift = 4; final int radix = 1 << shift; final long mask = radix - 1; // Negative sign is prepended separately for negative numbers boolean negative = false; if (i < 0) { i = -i; negative = true; } do { buffer[position--] = digits[(int) (i & mask)]; i >>>= shift; } while (i != 0); if (negative) buffer[position--] = '-'; }
From source file:com.ricemap.spateDB.io.TextSerializerHelper.java
License:Apache License
/** * Appends hex representation of the given number to the given string. * If append is set to true, a comma is also appended to the text. * @param i/* www . j a va 2 s .c om*/ * @param t * @param appendComma */ public static void serializeDouble(double d, Text t, char toAppend) { byte[] bytes = Double.toString(d).getBytes(); t.append(bytes, 0, bytes.length); if (toAppend != '\0') { t.append(new byte[] { (byte) toAppend }, 0, 1); } }
From source file:com.ricemap.spateDB.io.TextSerializerHelper.java
License:Apache License
public static void serializeLong(long i, Text t, char toAppend) { // Calculate number of bytes needed to serialize the given long int bytes_needed = 0; long temp;/*w ww. j av a 2 s . c om*/ if (i < 0) { bytes_needed++; // An additional temp = -i; } else { temp = i; } do { bytes_needed += 1; temp /= 10; } while (temp != 0); if (toAppend != '\0') bytes_needed++; // Reserve the bytes needed in the text t.append(ToAppend, 0, bytes_needed); // Extract the underlying buffer array and fill it directly byte[] buffer = t.getBytes(); // Position of the next character to write in the text int position = t.getLength() - 1; if (toAppend != '\0') buffer[position--] = (byte) toAppend; // Negative sign is prepended separately for negative numbers boolean negative = false; if (i < 0) { i = -i; negative = true; } do { int digit = (int) (i % 10); buffer[position--] = digits[digit]; i /= 10; } while (i != 0); if (negative) buffer[position--] = '-'; }
From source file:com.ricemap.spateDB.io.TextSerializerHelper.java
License:Apache License
public static void serializeInt(int i, Text t, char toAppend) { // Calculate number of bytes needed to serialize the given long int bytes_needed = 0; int temp;//from w ww. j a v a 2 s .co m if (i < 0) { bytes_needed++; // An additional temp = -i; } else { temp = i; } do { bytes_needed += 1; temp /= 10; } while (temp != 0); if (toAppend != '\0') bytes_needed++; // Reserve the bytes needed in the text t.append(ToAppend, 0, bytes_needed); // Extract the underlying buffer array and fill it directly byte[] buffer = t.getBytes(); // Position of the next character to write in the text int position = t.getLength() - 1; if (toAppend != '\0') buffer[position--] = (byte) toAppend; // Negative sign is prepended separately for negative numbers boolean negative = false; if (i < 0) { i = -i; negative = true; } do { int digit = i % 10; buffer[position--] = digits[digit]; i /= 10; } while (i != 0); if (negative) buffer[position--] = '-'; }
From source file:com.ricemap.spateDB.io.TextSerializerHelper.java
License:Apache License
public static Text serializeMap(Text text, Map<String, String> tags) { if (!tags.isEmpty()) { boolean first = true; text.append(Separators, MapStart, 1); for (Map.Entry<String, String> entry : tags.entrySet()) { if (first) { first = false;// w w w.j a v a 2 s . com } else { first = true; text.append(Separators, FieldSeparator, 1); } byte[] k = entry.getKey().getBytes(); text.append(k, 0, k.length); text.append(Separators, KeyValueSeparator, 1); byte[] v = entry.getValue().getBytes(); text.append(v, 0, v.length); } text.append(Separators, MapEnd, 1); } return text; }
From source file:com.ricemap.spateDB.mapred.SpatialRecordReader.java
License:Apache License
/** * Reads the next line from input and return true if a line was read. * If no more lines are available in this split, a false is returned. * @param value//from ww w .j a v a2 s.c om * @return * @throws IOException */ protected boolean nextLine(Text value) throws IOException { if (blockType == BlockType.RTREE && pos == 8) { // File is positioned at the RTree header // Skip the header and go to first data object in file pos += RTree.skipHeader(in); LOG.info("Skipped R-tree to position: " + pos); // Reinitialize record reader at the new position lineReader = new LineReader(in); } while (getFilePosition() <= end) { value.clear(); int b = 0; if (buffer != null) { // Read the first line encountered in buffer int eol = RTree.skipToEOL(buffer, 0); b += eol; value.append(buffer, 0, eol); if (eol < buffer.length) { // There are still some bytes remaining in buffer byte[] tmp = new byte[buffer.length - eol]; System.arraycopy(buffer, eol, tmp, 0, tmp.length); } else { buffer = null; } // Check if a complete line has been read from the buffer byte last_byte = value.getBytes()[value.getLength() - 1]; if (last_byte == '\n' || last_byte == '\r') return true; } // Read the first line from stream Text temp = new Text(); b += lineReader.readLine(temp); if (b == 0) { // Indicates an end of stream return false; } pos += b; // Append the part read from stream to the part extracted from buffer value.append(temp.getBytes(), 0, temp.getLength()); if (value.getLength() > 1) { // Read a non-empty line. Note that end-of-line character is included return true; } } // Reached end of file return false; }
From source file:com.ricemap.spateDB.operations.Tail.java
License:Apache License
/** * Reads a maximum of n lines from the stream starting from its current * position and going backward./*w w w . ja v a 2 s .c om*/ * * @param in - An input stream. It'll be scanned from its current position * backward till position 0 * @param n - Maximum number of lines to return * @param stockObject - An object used to deserialize lines read. It can * be set to <code>null</code> if output is also <code>null</code>. In this * case, nothing is reported to the output. * @param output - An output collector used to report lines read. * @return - The position of the beginning of the earliest line read from * buffer. * @throws IOException */ public static <T extends TextSerializable> long tail(FSDataInputStream in, int n, T stockObject, ResultCollector<T> output) throws IOException { int lines_read = 0; long end = in.getPos(); long offset_of_last_eol = end; long last_read_byte = end; LongWritable line_offset = new LongWritable(); Text read_line = new Text(); Text remainder_from_last_buffer = new Text(); byte[] buffer = new byte[4096]; while (last_read_byte > 0 && lines_read < n) { // Read next chunk from the back long first_byte_to_read = (last_read_byte - 1) - (last_read_byte - 1) % buffer.length; in.seek(first_byte_to_read); int bytes_to_read = (int) (last_read_byte - first_byte_to_read); in.read(buffer, 0, bytes_to_read); last_read_byte = first_byte_to_read; // Iterate over bytes in this buffer int i_last_byte_consumed_in_buffer = bytes_to_read; int i_last_byte_examined_in_buffer = bytes_to_read; while (i_last_byte_examined_in_buffer > 0 && lines_read < n) { byte byte_examined = buffer[--i_last_byte_examined_in_buffer]; if (byte_examined == '\n' || byte_examined == '\r') { // Found an end of line character // Report this to output unless it's empty long offset_of_this_eol = first_byte_to_read + i_last_byte_examined_in_buffer; if (offset_of_last_eol - offset_of_this_eol > 1) { if (output != null) { read_line.clear(); // +1 is to skip the EOL at the beginning read_line.append(buffer, i_last_byte_examined_in_buffer + 1, i_last_byte_consumed_in_buffer - (i_last_byte_examined_in_buffer + 1)); // Also append bytes remaining from last buffer if (remainder_from_last_buffer.getLength() > 0) { read_line.append(remainder_from_last_buffer.getBytes(), 0, remainder_from_last_buffer.getLength()); } line_offset.set(offset_of_this_eol + 1); stockObject.fromText(read_line); output.collect(stockObject); } lines_read++; remainder_from_last_buffer.clear(); } i_last_byte_consumed_in_buffer = i_last_byte_examined_in_buffer; offset_of_last_eol = offset_of_this_eol; } } if (i_last_byte_consumed_in_buffer > 0) { // There are still some bytes not consumed in buffer if (remainder_from_last_buffer.getLength() == 0) { // Store whatever is remaining in remainder_from_last_buffer remainder_from_last_buffer.append(buffer, 0, i_last_byte_consumed_in_buffer); } else { // Prepend remaining bytes to Text Text t = new Text(); t.append(buffer, 0, i_last_byte_consumed_in_buffer); t.append(remainder_from_last_buffer.getBytes(), 0, remainder_from_last_buffer.getLength()); remainder_from_last_buffer = t; } } } if (lines_read < n && remainder_from_last_buffer.getLength() > 0) { // There is still one last line needs to be reported lines_read++; if (output != null) { read_line = remainder_from_last_buffer; line_offset.set(0); stockObject.fromText(read_line); output.collect(stockObject); } offset_of_last_eol = -1; } return offset_of_last_eol + 1; }
From source file:com.tgam.hadoop.util.GenericEscapedLineReader.java
License:Apache License
/** * Read one line from the InputStream into the given Text. A line * can be terminated by one of the following: '\n' (LF) , '\r' (CR), * or '\r\n' (CR+LF). EOF also terminates an otherwise unterminated * line./* w w w.j a v a 2 s .co m*/ * * @param str the object to store the given line (without newline) * @param maxLineLength the maximum number of bytes to store into str; * the rest of the line is silently discarded. * @param maxBytesToConsume the maximum number of bytes to consume * in this call. This is only a hint, because if the line cross * this threshold, we allow it to happen. It can overshoot * potentially by as much as one buffer length. * * @return the number of bytes read including the (longest) newline * found. * * @throws IOException if the underlying stream throws */ public int readLine(Text str, int maxLineLength, int maxBytesToConsume) throws IOException { /* We're reading data from in, but the head of the stream may be * already buffered in buffer, so we have several cases: * 1. No newline characters are in the buffer, so we need to copy * everything and read another buffer from the stream. * 2. An unambiguously terminated line is in buffer, so we just * copy to str. * 3. Ambiguously terminated line is in buffer, i.e. buffer ends * in CR. In this case we copy everything up to CR to str, but * we also need to see what follows CR: if it's LF, then we * need consume LF as well, so next call to readLine will read * from after that. * We use a flag prevCharCR to signal if previous character was CR * and, if it happens to be at the end of the buffer, delay * consuming it until we have a chance to look at the char that * follows. */ str.clear(); int txtLength = 0; //tracks str.getLength(), as an optimization int newlineLength = 0; //length of terminating newline boolean prevCharCR = false; //true of prev char was CR boolean prevCharEscape = false; long bytesConsumed = 0; do { int startPosn = bufferPosn; //starting from where we left off the last time if (bufferPosn >= bufferLength) { startPosn = bufferPosn = 0; if (prevCharCR) ++bytesConsumed; //account for CR from previous read bufferLength = in.read(buffer); if (bufferLength <= 0) break; // EOF } for (; bufferPosn < bufferLength; ++bufferPosn) { //search for newline if (buffer[bufferPosn] == LF && !prevCharEscape) { newlineLength = (prevCharCR) ? 2 : 1; ++bufferPosn; // at next invocation proceed from following byte break; } if (prevCharCR) { //CR + notLF, we are at notLF newlineLength = 1; break; } prevCharCR = (buffer[bufferPosn] == CR && !prevCharEscape); prevCharEscape = (buffer[bufferPosn] == ESCAPE); } int readLength = bufferPosn - startPosn; if (prevCharCR && newlineLength == 0) --readLength; //CR at the end of the buffer bytesConsumed += readLength; int appendLength = readLength - newlineLength; if (appendLength > maxLineLength - txtLength) { appendLength = maxLineLength - txtLength; } if (appendLength > 0) { str.append(buffer, startPosn, appendLength); txtLength += appendLength; } } while (newlineLength == 0 && bytesConsumed < maxBytesToConsume); if (bytesConsumed > (long) Integer.MAX_VALUE) throw new IOException("Too many bytes before newline: " + bytesConsumed); return (int) bytesConsumed; }