List of usage examples for org.apache.hadoop.io Text append
public void append(byte[] utf8, int start, int len)
From source file:com.asakusafw.runtime.io.TsvParser.java
License:Apache License
private void consumeEncoded(Text text) { encodeBuffer.flip();/*w ww .java 2s. c om*/ if (encodeBuffer.hasRemaining()) { text.append(encodeBuffer.array(), encodeBuffer.position(), encodeBuffer.limit()); } encodeBuffer.clear(); }
From source file:com.ashishpaliwal.hadoop.utils.inputformat.CsvLineReader.java
License:Apache License
/** * Read from the InputStream into the given Text. * * @param txt the object to store the given line * @param maxLineLength the maximum number of bytes to store into txt. * @param maxBytesToConsume the maximum number of bytes to consume in this * call./*w w w.ja v a2 s. c o m*/ * @return the number of bytes read including the newline * @throws IOException if the underlying stream throws */ public int readLine(Text txt, int maxLineLength, int maxBytesToConsume) throws IOException { txt.clear(); boolean hadFinalNewline = false; boolean hadFinalReturn = false; boolean hitEndOfFile = false; int startPosn = bufferPosn; long bytesConsumed = 0; boolean inQuote = false; boolean isLastCharEscapeChar = false; outerLoop: while (true) { if (bufferPosn >= bufferLength) { if (!backfill()) { hitEndOfFile = true; break; } } startPosn = bufferPosn; for (; bufferPosn < bufferLength; ++bufferPosn) { switch (buffer[bufferPosn]) { case '\\': isLastCharEscapeChar = !isLastCharEscapeChar; break; case '"': if (!inQuote && hadFinalReturn) { break outerLoop; } if (!isLastCharEscapeChar) { inQuote = !inQuote; } isLastCharEscapeChar = false; break; case '\n': isLastCharEscapeChar = false; if (!inQuote) { hadFinalNewline = true; bufferPosn += 1; break outerLoop; } break; case '\r': isLastCharEscapeChar = false; if (!inQuote) { if (hadFinalReturn) { // leave this \r in the stream, so we'll get it next time break outerLoop; } hadFinalReturn = true; } break; default: isLastCharEscapeChar = false; if (!inQuote && hadFinalReturn) { break outerLoop; } } } bytesConsumed += bufferPosn - startPosn; int length = bufferPosn - startPosn - (hadFinalReturn ? 1 : 0); length = Math.min(length, maxLineLength - txt.getLength()); if (length >= 0) txt.append(buffer, startPosn, length); if (bytesConsumed >= maxBytesToConsume) return (int) Math.min(bytesConsumed, (long) Integer.MAX_VALUE); } int newlineLength = (hadFinalNewline ? 1 : 0) + (hadFinalReturn ? 1 : 0); if (!hitEndOfFile) { bytesConsumed += bufferPosn - startPosn; int length = bufferPosn - startPosn - newlineLength; length = Math.min(length, maxLineLength - txt.getLength()); if (length > 0) txt.append(buffer, startPosn, length); } return (int) Math.min(bytesConsumed, (long) Integer.MAX_VALUE); }
From source file:com.blm.orc.DynamicByteArray.java
License:Apache License
/** * Set a text value from the bytes in this dynamic array. * @param result the value to set/*w w w .j a va 2s . c o m*/ * @param offset the start of the bytes to copy * @param length the number of bytes to copy */ public void setText(Text result, int offset, int length) { result.clear(); int currentChunk = offset / chunkSize; int currentOffset = offset % chunkSize; int currentLength = Math.min(length, chunkSize - currentOffset); while (length > 0) { result.append(data[currentChunk], currentOffset, currentLength); length -= currentLength; currentChunk += 1; currentOffset = 0; currentLength = Math.min(length, chunkSize - currentOffset); } }
From source file:com.dinglicom.clouder.mapreduce.input.LineReader.java
License:Apache License
/** * Read a line terminated by one of CR, LF, or CRLF. *///w w w .j a v a 2 s .com private int readDefaultLine(Text str, int maxLineLength, int maxBytesToConsume) throws IOException { /* We're reading data from in, but the head of the stream may be * already buffered in buffer, so we have several cases: * 1. No newline characters are in the buffer, so we need to copy * everything and read another buffer from the stream. * 2. An unambiguously terminated line is in buffer, so we just * copy to str. * 3. Ambiguously terminated line is in buffer, i.e. buffer ends * in CR. In this case we copy everything up to CR to str, but * we also need to see what follows CR: if it's LF, then we * need consume LF as well, so next call to readLine will read * from after that. * We use a flag prevCharCR to signal if previous character was CR * and, if it happens to be at the end of the buffer, delay * consuming it until we have a chance to look at the char that * follows. */ str.clear(); int txtLength = 0; //tracks str.getLength(), as an optimization int newlineLength = 0; //length of terminating newline boolean prevCharCR = false; //true of prev char was CR long bytesConsumed = 0; do { int startPosn = bufferPosn; //starting from where we left off the last time if (bufferPosn >= bufferLength) { startPosn = bufferPosn = 0; if (prevCharCR) ++bytesConsumed; //account for CR from previous read bufferLength = in.read(buffer); if (bufferLength <= 0) break; // EOF } for (; bufferPosn < bufferLength; ++bufferPosn) { //search for newline if (buffer[bufferPosn] == LF) { newlineLength = (prevCharCR) ? 2 : 1; ++bufferPosn; // at next invocation proceed from following byte break; } if (prevCharCR) { //CR + notLF, we are at notLF newlineLength = 1; break; } prevCharCR = (buffer[bufferPosn] == CR); } int readLength = bufferPosn - startPosn; if (prevCharCR && newlineLength == 0) --readLength; //CR at the end of the buffer bytesConsumed += readLength; int appendLength = readLength - newlineLength; if (appendLength > maxLineLength - txtLength) { appendLength = maxLineLength - txtLength; } if (appendLength > 0) { str.append(buffer, startPosn, appendLength); txtLength += appendLength; } } while (newlineLength == 0 && bytesConsumed < maxBytesToConsume); if (bytesConsumed > (long) Integer.MAX_VALUE) throw new IOException("Too many bytes before newline: " + bytesConsumed); return (int) bytesConsumed; }
From source file:com.dinglicom.clouder.mapreduce.input.LineReader.java
License:Apache License
/** * Read a line terminated by a custom delimiter. *///w w w . j a v a 2s . c o m private int readCustomLine(Text str, int maxLineLength, int maxBytesToConsume) throws IOException { str.clear(); int txtLength = 0; // tracks str.getLength(), as an optimization long bytesConsumed = 0; int delPosn = 0; do { int startPosn = bufferPosn; // starting from where we left off the last // time if (bufferPosn >= bufferLength) { startPosn = bufferPosn = 0; bufferLength = in.read(buffer); if (bufferLength <= 0) break; // EOF } for (; bufferPosn < bufferLength; ++bufferPosn) { if (buffer[bufferPosn] == recordDelimiterBytes[delPosn]) { delPosn++; if (delPosn >= recordDelimiterBytes.length) { bufferPosn++; break; } } else { delPosn = 0; } } int readLength = bufferPosn - startPosn; bytesConsumed += readLength; int appendLength = readLength - delPosn; if (appendLength > maxLineLength - txtLength) { appendLength = maxLineLength - txtLength; } if (appendLength > 0) { str.append(buffer, startPosn, appendLength); txtLength += appendLength; } } while (delPosn < recordDelimiterBytes.length && bytesConsumed < maxBytesToConsume); if (bytesConsumed > (long) Integer.MAX_VALUE) throw new IOException("Too many bytes before delimiter: " + bytesConsumed); return (int) bytesConsumed; }
From source file:com.ery.hadoop.mrddx.file.LineReaders.java
License:Apache License
/** * Read a line terminated by one of CR, LF, or CRLF. *///from w w w.ja va2s . co m private int readDefaultLine(Text str, int maxLineLength, int maxBytesToConsume) throws IOException { /* * We're reading data from in, but the head of the stream may be already * buffered in buffer, so we have several cases: 1. No newline * characters are in the buffer, so we need to copy everything and read * another buffer from the stream. 2. An unambiguously terminated line * is in buffer, so we just copy to str. 3. Ambiguously terminated line * is in buffer, i.e. buffer ends in CR. In this case we copy everything * up to CR to str, but we also need to see what follows CR: if it's LF, * then we need consume LF as well, so next call to readLine will read * from after that. We use a flag prevCharCR to signal if previous * character was CR and, if it happens to be at the end of the buffer, * delay consuming it until we have a chance to look at the char that * follows. */ str.clear(); int txtLength = 0; // tracks str.getLength(), as an optimization int newlineLength = 0; // length of terminating newline boolean prevCharCR = false; // true of prev char was CR long bytesConsumed = 0; do { int startPosn = bufferPosn; // starting from where we left off the // last time if (bufferPosn >= bufferLength) { startPosn = bufferPosn = 0; if (prevCharCR) ++bytesConsumed; // account for CR from previous read bufferLength = in.read(buffer); if (bufferLength <= 0) break; // EOF } for (; bufferPosn < bufferLength; ++bufferPosn) { // search for // newline if (buffer[bufferPosn] == LF) { newlineLength = (prevCharCR) ? 2 : 1; ++bufferPosn; // at next invocation proceed from following // byte break; } if (prevCharCR) { // CR + notLF, we are at notLF newlineLength = 1; break; } prevCharCR = (buffer[bufferPosn] == CR); } int readLength = bufferPosn - startPosn; if (prevCharCR && newlineLength == 0) --readLength; // CR at the end of the buffer bytesConsumed += readLength; int appendLength = readLength - newlineLength; if (appendLength > maxLineLength - txtLength) { appendLength = maxLineLength - txtLength; } if (appendLength > 0) { str.append(buffer, startPosn, appendLength); txtLength += appendLength; } } while (newlineLength == 0 && bytesConsumed < maxBytesToConsume); if (bytesConsumed > (long) Integer.MAX_VALUE) throw new IOException("Too many bytes before newline: " + bytesConsumed); return (int) bytesConsumed; }
From source file:com.ery.hadoop.mrddx.file.LineReaders.java
License:Apache License
/** * Read a line terminated by a custom delimiter. *///from w ww . j av a 2 s .com private int readCustomLine(Text str, int maxLineLength, int maxBytesToConsume) throws IOException { str.clear(); int txtLength = 0; // tracks str.getLength(), as an optimization long bytesConsumed = 0; int delPosn = 0; do { int startPosn = bufferPosn; // starting from where we left off the // last // time if (bufferPosn >= bufferLength) { startPosn = bufferPosn = 0; bufferLength = in.read(buffer); if (bufferLength <= 0) break; // EOF } for (; bufferPosn < bufferLength; ++bufferPosn) { if (buffer[bufferPosn] == recordDelimiterBytes[delPosn]) { delPosn++; if (delPosn >= recordDelimiterBytes.length) { bufferPosn++; break; } } else { delPosn = 0; } } int readLength = bufferPosn - startPosn; bytesConsumed += readLength; int appendLength = readLength - delPosn; if (appendLength > maxLineLength - txtLength) { appendLength = maxLineLength - txtLength; } if (appendLength > 0) { str.append(buffer, startPosn, appendLength); txtLength += appendLength; } } while (delPosn < recordDelimiterBytes.length && bytesConsumed < maxBytesToConsume); if (bytesConsumed > (long) Integer.MAX_VALUE) throw new IOException("Too many bytes before delimiter: " + bytesConsumed); return (int) bytesConsumed; }
From source file:com.kasabi.labs.freebase.mr.Freebase2RDFMapper.java
License:Apache License
private void append(Text text, byte[] bytes) { text.append(bytes, 0, bytes.length); }
From source file:com.kasabi.labs.freebase.mr.Freebase2RDFMapper.java
License:Apache License
private void append(Text text, String str) throws UnsupportedEncodingException { byte[] bytes = str.getBytes("UTF-8"); text.append(bytes, 0, bytes.length); }
From source file:com.ricemap.spateDB.core.GridInfo.java
License:Apache License
@Override public Text toText(Text text) { final byte[] Comma = ",".getBytes(); super.toText(text); text.append(Comma, 0, Comma.length); TextSerializerHelper.serializeLong(layers, text, ','); TextSerializerHelper.serializeLong(columns, text, ','); TextSerializerHelper.serializeLong(rows, text, '\0'); return text;//from w ww . jav a 2 s . co m }