List of usage examples for org.apache.hadoop.io Text clear
public void clear()
From source file:com.blm.orc.DynamicByteArray.java
License:Apache License
/** * Set a text value from the bytes in this dynamic array. * @param result the value to set//from w w w . j av a 2 s . com * @param offset the start of the bytes to copy * @param length the number of bytes to copy */ public void setText(Text result, int offset, int length) { result.clear(); int currentChunk = offset / chunkSize; int currentOffset = offset % chunkSize; int currentLength = Math.min(length, chunkSize - currentOffset); while (length > 0) { result.append(data[currentChunk], currentOffset, currentLength); length -= currentLength; currentChunk += 1; currentOffset = 0; currentLength = Math.min(length, chunkSize - currentOffset); } }
From source file:com.dinglicom.clouder.mapreduce.input.LineReader.java
License:Apache License
/** * Read a line terminated by one of CR, LF, or CRLF. */// w w w .j a v a2 s . co m private int readDefaultLine(Text str, int maxLineLength, int maxBytesToConsume) throws IOException { /* We're reading data from in, but the head of the stream may be * already buffered in buffer, so we have several cases: * 1. No newline characters are in the buffer, so we need to copy * everything and read another buffer from the stream. * 2. An unambiguously terminated line is in buffer, so we just * copy to str. * 3. Ambiguously terminated line is in buffer, i.e. buffer ends * in CR. In this case we copy everything up to CR to str, but * we also need to see what follows CR: if it's LF, then we * need consume LF as well, so next call to readLine will read * from after that. * We use a flag prevCharCR to signal if previous character was CR * and, if it happens to be at the end of the buffer, delay * consuming it until we have a chance to look at the char that * follows. */ str.clear(); int txtLength = 0; //tracks str.getLength(), as an optimization int newlineLength = 0; //length of terminating newline boolean prevCharCR = false; //true of prev char was CR long bytesConsumed = 0; do { int startPosn = bufferPosn; //starting from where we left off the last time if (bufferPosn >= bufferLength) { startPosn = bufferPosn = 0; if (prevCharCR) ++bytesConsumed; //account for CR from previous read bufferLength = in.read(buffer); if (bufferLength <= 0) break; // EOF } for (; bufferPosn < bufferLength; ++bufferPosn) { //search for newline if (buffer[bufferPosn] == LF) { newlineLength = (prevCharCR) ? 2 : 1; ++bufferPosn; // at next invocation proceed from following byte break; } if (prevCharCR) { //CR + notLF, we are at notLF newlineLength = 1; break; } prevCharCR = (buffer[bufferPosn] == CR); } int readLength = bufferPosn - startPosn; if (prevCharCR && newlineLength == 0) --readLength; //CR at the end of the buffer bytesConsumed += readLength; int appendLength = readLength - newlineLength; if (appendLength > maxLineLength - txtLength) { appendLength = maxLineLength - txtLength; } if (appendLength > 0) { str.append(buffer, startPosn, appendLength); txtLength += appendLength; } } while (newlineLength == 0 && bytesConsumed < maxBytesToConsume); if (bytesConsumed > (long) Integer.MAX_VALUE) throw new IOException("Too many bytes before newline: " + bytesConsumed); return (int) bytesConsumed; }
From source file:com.dinglicom.clouder.mapreduce.input.LineReader.java
License:Apache License
/** * Read a line terminated by a custom delimiter. *//*from w ww. j a v a2 s . com*/ private int readCustomLine(Text str, int maxLineLength, int maxBytesToConsume) throws IOException { str.clear(); int txtLength = 0; // tracks str.getLength(), as an optimization long bytesConsumed = 0; int delPosn = 0; do { int startPosn = bufferPosn; // starting from where we left off the last // time if (bufferPosn >= bufferLength) { startPosn = bufferPosn = 0; bufferLength = in.read(buffer); if (bufferLength <= 0) break; // EOF } for (; bufferPosn < bufferLength; ++bufferPosn) { if (buffer[bufferPosn] == recordDelimiterBytes[delPosn]) { delPosn++; if (delPosn >= recordDelimiterBytes.length) { bufferPosn++; break; } } else { delPosn = 0; } } int readLength = bufferPosn - startPosn; bytesConsumed += readLength; int appendLength = readLength - delPosn; if (appendLength > maxLineLength - txtLength) { appendLength = maxLineLength - txtLength; } if (appendLength > 0) { str.append(buffer, startPosn, appendLength); txtLength += appendLength; } } while (delPosn < recordDelimiterBytes.length && bytesConsumed < maxBytesToConsume); if (bytesConsumed > (long) Integer.MAX_VALUE) throw new IOException("Too many bytes before delimiter: " + bytesConsumed); return (int) bytesConsumed; }
From source file:com.ery.hadoop.mrddx.file.LineReaders.java
License:Apache License
/** * Read a line terminated by one of CR, LF, or CRLF. *///from w w w .ja v a2 s. co m private int readDefaultLine(Text str, int maxLineLength, int maxBytesToConsume) throws IOException { /* * We're reading data from in, but the head of the stream may be already * buffered in buffer, so we have several cases: 1. No newline * characters are in the buffer, so we need to copy everything and read * another buffer from the stream. 2. An unambiguously terminated line * is in buffer, so we just copy to str. 3. Ambiguously terminated line * is in buffer, i.e. buffer ends in CR. In this case we copy everything * up to CR to str, but we also need to see what follows CR: if it's LF, * then we need consume LF as well, so next call to readLine will read * from after that. We use a flag prevCharCR to signal if previous * character was CR and, if it happens to be at the end of the buffer, * delay consuming it until we have a chance to look at the char that * follows. */ str.clear(); int txtLength = 0; // tracks str.getLength(), as an optimization int newlineLength = 0; // length of terminating newline boolean prevCharCR = false; // true of prev char was CR long bytesConsumed = 0; do { int startPosn = bufferPosn; // starting from where we left off the // last time if (bufferPosn >= bufferLength) { startPosn = bufferPosn = 0; if (prevCharCR) ++bytesConsumed; // account for CR from previous read bufferLength = in.read(buffer); if (bufferLength <= 0) break; // EOF } for (; bufferPosn < bufferLength; ++bufferPosn) { // search for // newline if (buffer[bufferPosn] == LF) { newlineLength = (prevCharCR) ? 2 : 1; ++bufferPosn; // at next invocation proceed from following // byte break; } if (prevCharCR) { // CR + notLF, we are at notLF newlineLength = 1; break; } prevCharCR = (buffer[bufferPosn] == CR); } int readLength = bufferPosn - startPosn; if (prevCharCR && newlineLength == 0) --readLength; // CR at the end of the buffer bytesConsumed += readLength; int appendLength = readLength - newlineLength; if (appendLength > maxLineLength - txtLength) { appendLength = maxLineLength - txtLength; } if (appendLength > 0) { str.append(buffer, startPosn, appendLength); txtLength += appendLength; } } while (newlineLength == 0 && bytesConsumed < maxBytesToConsume); if (bytesConsumed > (long) Integer.MAX_VALUE) throw new IOException("Too many bytes before newline: " + bytesConsumed); return (int) bytesConsumed; }
From source file:com.ery.hadoop.mrddx.file.LineReaders.java
License:Apache License
/** * Read a line terminated by a custom delimiter. *//*from w w w .jav a2 s .c o m*/ private int readCustomLine(Text str, int maxLineLength, int maxBytesToConsume) throws IOException { str.clear(); int txtLength = 0; // tracks str.getLength(), as an optimization long bytesConsumed = 0; int delPosn = 0; do { int startPosn = bufferPosn; // starting from where we left off the // last // time if (bufferPosn >= bufferLength) { startPosn = bufferPosn = 0; bufferLength = in.read(buffer); if (bufferLength <= 0) break; // EOF } for (; bufferPosn < bufferLength; ++bufferPosn) { if (buffer[bufferPosn] == recordDelimiterBytes[delPosn]) { delPosn++; if (delPosn >= recordDelimiterBytes.length) { bufferPosn++; break; } } else { delPosn = 0; } } int readLength = bufferPosn - startPosn; bytesConsumed += readLength; int appendLength = readLength - delPosn; if (appendLength > maxLineLength - txtLength) { appendLength = maxLineLength - txtLength; } if (appendLength > 0) { str.append(buffer, startPosn, appendLength); txtLength += appendLength; } } while (delPosn < recordDelimiterBytes.length && bytesConsumed < maxBytesToConsume); if (bytesConsumed > (long) Integer.MAX_VALUE) throw new IOException("Too many bytes before delimiter: " + bytesConsumed); return (int) bytesConsumed; }
From source file:com.facebook.hive.orc.DynamicByteArray.java
License:Open Source License
/** * Set a text value from the bytes in this dynamic array. * @param result the value to set//from w w w. ja v a 2s . c o m * @param offset the start of the bytes to copy * @param length the number of bytes to copy */ public void setText(Text result, int offset, int length) { result.clear(); result.set(data.getBytes(), offset, length); }
From source file:com.facebook.hive.orc.lazy.LazyStringDictionaryTreeReader.java
License:Open Source License
private void nextFromDictionary(Text result) throws IOException { int entry = (int) reader.next(); int offset = dictionaryOffsets[entry]; int length = dictionaryOffsets[entry + 1] - dictionaryOffsets[entry]; // If the column is just empty strings, the size will be zero, so the buffer will be null, // in that case just return result as it will default to empty if (dictionaryBuffer != null) { dictionaryBuffer.setText(result, offset, length); } else {//from w w w . j ava 2s. co m result.clear(); } }
From source file:com.facebook.hive.orc.lazy.LazyStringDictionaryTreeReader.java
License:Open Source License
private void nextFromStrideDictionary(Text result) throws IOException { int indexEntry = computeRowIndexEntry(previousRow); if (indexEntry != currentUnitDictionary) { loadStrideDictionary(indexEntry); }/*from ww w .j a v a2s . co m*/ int entry = (int) reader.next(); int offset = strideDictionaryOffsets[entry]; int length; // if it isn't the last entry, subtract the offsets otherwise use // the buffer length. if (entry < strideDictionaryOffsets.length - 1) { length = strideDictionaryOffsets[entry + 1] - offset; } else { length = strideDictionaryBuffer.size() - offset; } // If the column is just empty strings, the size will be zero, so the buffer will be null, // in that case just return result as it will default to empty if (strideDictionaryBuffer != null) { strideDictionaryBuffer.setText(result, offset, length); } else { result.clear(); } }
From source file:com.ricemap.spateDB.core.RTree.java
License:Apache License
/** * Builds the RTree given a serialized list of elements. It uses the given * stockObject to deserialize these elements and build the tree. Also writes * the created tree to the disk directly. * //ww w . j a v a 2 s.co m * @param elements * - serialization of elements to be written * @param offset * - index of the first element to use in the elements array * @param len * - number of bytes to user from the elements array * @param bytesAvailable * - size available (in bytes) to store the tree structures * @param dataOut * - an output to use for writing the tree to * @param fast_sort * - setting this to <code>true</code> allows the method to run * faster by materializing the offset of each element in the list * which speeds up the comparison. However, this requires an * additional 16 bytes per element. So, for each 1M elements, the * method will require an additional 16 M bytes (approximately). */ public void bulkLoadWrite(final byte[] element_bytes, final int offset, final int len, final int degree, DataOutput dataOut, final boolean fast_sort, final boolean columnarStorage) { try { columnar = columnarStorage; //TODO: the order of fields should be stable under Oracle JVM, but not guaranteed Field[] fields = stockObject.getClass().getDeclaredFields(); // Count number of elements in the given text int i_start = offset; final Text line = new Text(); while (i_start < offset + len) { int i_end = skipToEOL(element_bytes, i_start); // Extract the line without end of line character line.set(element_bytes, i_start, i_end - i_start - 1); stockObject.fromText(line); elementCount++; i_start = i_end; } LOG.info("Bulk loading an RTree with " + elementCount + " elements"); // It turns out the findBestDegree returns the best degree when the // whole // tree is loaded to memory when processed. However, as current // algorithms // process the tree while it's on disk, a higher degree should be // selected // such that a node fits one file block (assumed to be 4K). // final int degree = findBestDegree(bytesAvailable, elementCount); LOG.info("Writing an RTree with degree " + degree); int height = Math.max(1, (int) Math.ceil(Math.log(elementCount) / Math.log(degree))); int leafNodeCount = (int) Math.pow(degree, height - 1); if (elementCount < 2 * leafNodeCount && height > 1) { height--; leafNodeCount = (int) Math.pow(degree, height - 1); } int nodeCount = (int) ((Math.pow(degree, height) - 1) / (degree - 1)); int nonLeafNodeCount = nodeCount - leafNodeCount; // Keep track of the offset of each element in the text final int[] offsets = new int[elementCount]; final int[] ids = new int[elementCount]; final double[] ts = fast_sort ? new double[elementCount] : null; final double[] xs = fast_sort ? new double[elementCount] : null; final double[] ys = fast_sort ? new double[elementCount] : null; //initialize columnar data output ByteArrayOutputStream index_bos = new ByteArrayOutputStream(); DataOutputStream index_dos = new DataOutputStream(index_bos); ByteArrayOutputStream[] bos = new ByteArrayOutputStream[fields.length]; DataOutputStream[] dos = new DataOutputStream[fields.length]; for (int i = 0; i < bos.length; i++) { bos[i] = new ByteArrayOutputStream(); dos[i] = new DataOutputStream(bos[i]); } i_start = offset; line.clear(); for (int i = 0; i < elementCount; i++) { offsets[i] = i_start; ids[i] = i; int i_end = skipToEOL(element_bytes, i_start); if (xs != null) { // Extract the line with end of line character line.set(element_bytes, i_start, i_end - i_start - 1); stockObject.fromText(line); // Sample center of the shape ts[i] = (stockObject.getMBR().t1 + stockObject.getMBR().t2) / 2; xs[i] = (stockObject.getMBR().x1 + stockObject.getMBR().x2) / 2; ys[i] = (stockObject.getMBR().y1 + stockObject.getMBR().y2) / 2; //build columnar storage if (stockObject instanceof Point3d) { index_dos.writeDouble(ts[i]); index_dos.writeDouble(xs[i]); index_dos.writeDouble(ys[i]); } else { throw new RuntimeException("Indexing non-point shape with RTREE is not supported yet"); } for (int j = 0; j < fields.length; j++) { if (fields[j].getType().equals(Integer.TYPE)) { dos[j].writeInt(fields[j].getInt(stockObject)); } else if (fields[j].getType().equals(Double.TYPE)) { dos[j].writeDouble(fields[j].getDouble(stockObject)); } else if (fields[j].getType().equals(Long.TYPE)) { dos[j].writeLong(fields[j].getLong(stockObject)); } else { continue; //throw new RuntimeException("Field type is not supported yet"); } } } i_start = i_end; } index_dos.close(); for (int i = 0; i < dos.length; i++) { dos[i].close(); } /** A struct to store information about a split */ class SplitStruct extends Prism { /** Start and end index for this split */ int index1, index2; /** Direction of this split */ byte direction; /** Index of first element on disk */ int offsetOfFirstElement; static final byte DIRECTION_T = 0; static final byte DIRECTION_X = 1; static final byte DIRECTION_Y = 2; SplitStruct(int index1, int index2, byte direction) { this.index1 = index1; this.index2 = index2; this.direction = direction; } @Override public void write(DataOutput out) throws IOException { // if (columnarStorage) out.writeInt(index1); else out.writeInt(offsetOfFirstElement); super.write(out); } void partition(Queue<SplitStruct> toBePartitioned) { IndexedSortable sortableT; IndexedSortable sortableX; IndexedSortable sortableY; if (fast_sort) { // Use materialized xs[] and ys[] to do the comparisons sortableT = new IndexedSortable() { @Override public void swap(int i, int j) { // Swap ts double tempt = ts[i]; ts[i] = ts[j]; ts[j] = tempt; // Swap xs double tempx = xs[i]; xs[i] = xs[j]; xs[j] = tempx; // Swap ys double tempY = ys[i]; ys[i] = ys[j]; ys[j] = tempY; // Swap id int tempid = offsets[i]; offsets[i] = offsets[j]; offsets[j] = tempid; tempid = ids[i]; ids[i] = ids[j]; ids[j] = tempid; } @Override public int compare(int i, int j) { if (ts[i] < ts[j]) return -1; if (ts[i] > ts[j]) return 1; return 0; } }; sortableX = new IndexedSortable() { @Override public void swap(int i, int j) { // Swap ts double tempt = ts[i]; ts[i] = ts[j]; ts[j] = tempt; // Swap xs double tempx = xs[i]; xs[i] = xs[j]; xs[j] = tempx; // Swap ys double tempY = ys[i]; ys[i] = ys[j]; ys[j] = tempY; // Swap id int tempid = offsets[i]; offsets[i] = offsets[j]; offsets[j] = tempid; tempid = ids[i]; ids[i] = ids[j]; ids[j] = tempid; } @Override public int compare(int i, int j) { if (ts[i] < ts[j]) return -1; if (xs[i] < xs[j]) return -1; if (xs[i] > xs[j]) return 1; return 0; } }; sortableY = new IndexedSortable() { @Override public void swap(int i, int j) { // Swap ts double tempt = ts[i]; ts[i] = ts[j]; ts[j] = tempt; // Swap xs double tempx = xs[i]; xs[i] = xs[j]; xs[j] = tempx; // Swap ys double tempY = ys[i]; ys[i] = ys[j]; ys[j] = tempY; // Swap id int tempid = offsets[i]; offsets[i] = offsets[j]; offsets[j] = tempid; tempid = ids[i]; ids[i] = ids[j]; ids[j] = tempid; } @Override public int compare(int i, int j) { if (ys[i] < ys[j]) return -1; if (ys[i] > ys[j]) return 1; return 0; } }; } else { // No materialized xs and ys. Always deserialize objects // to compare sortableT = new IndexedSortable() { @Override public void swap(int i, int j) { // Swap id int tempid = offsets[i]; offsets[i] = offsets[j]; offsets[j] = tempid; tempid = ids[i]; ids[i] = ids[j]; ids[j] = tempid; } @Override public int compare(int i, int j) { // Get end of line int eol = skipToEOL(element_bytes, offsets[i]); line.set(element_bytes, offsets[i], eol - offsets[i] - 1); stockObject.fromText(line); double ti = (stockObject.getMBR().t1 + stockObject.getMBR().t2) / 2; eol = skipToEOL(element_bytes, offsets[j]); line.set(element_bytes, offsets[j], eol - offsets[j] - 1); stockObject.fromText(line); double tj = (stockObject.getMBR().t1 + stockObject.getMBR().t2) / 2; if (ti < tj) return -1; if (ti > tj) return 1; return 0; } }; sortableX = new IndexedSortable() { @Override public void swap(int i, int j) { // Swap id int tempid = offsets[i]; offsets[i] = offsets[j]; offsets[j] = tempid; tempid = ids[i]; ids[i] = ids[j]; ids[j] = tempid; } @Override public int compare(int i, int j) { // Get end of line int eol = skipToEOL(element_bytes, offsets[i]); line.set(element_bytes, offsets[i], eol - offsets[i] - 1); stockObject.fromText(line); double xi = (stockObject.getMBR().x1 + stockObject.getMBR().x2) / 2; eol = skipToEOL(element_bytes, offsets[j]); line.set(element_bytes, offsets[j], eol - offsets[j] - 1); stockObject.fromText(line); double xj = (stockObject.getMBR().x1 + stockObject.getMBR().x2) / 2; if (xi < xj) return -1; if (xi > xj) return 1; return 0; } }; sortableY = new IndexedSortable() { @Override public void swap(int i, int j) { // Swap id int tempid = offsets[i]; offsets[i] = offsets[j]; offsets[j] = tempid; tempid = ids[i]; ids[i] = ids[j]; ids[j] = tempid; } @Override public int compare(int i, int j) { int eol = skipToEOL(element_bytes, offsets[i]); line.set(element_bytes, offsets[i], eol - offsets[i] - 1); stockObject.fromText(line); double yi = (stockObject.getMBR().y1 + stockObject.getMBR().y2) / 2; eol = skipToEOL(element_bytes, offsets[j]); line.set(element_bytes, offsets[j], eol - offsets[j] - 1); stockObject.fromText(line); double yj = (stockObject.getMBR().y1 + stockObject.getMBR().y2) / 2; if (yi < yj) return -1; if (yi > yj) return 1; return 0; } }; } final IndexedSorter sorter = new QuickSort(); final IndexedSortable[] sortables = new IndexedSortable[3]; sortables[SplitStruct.DIRECTION_T] = sortableT; sortables[SplitStruct.DIRECTION_X] = sortableX; sortables[SplitStruct.DIRECTION_Y] = sortableY; sorter.sort(sortables[direction], index1, index2); // Partition into maxEntries partitions (equally) and // create a SplitStruct for each partition int i1 = index1; for (int iSplit = 0; iSplit < degree; iSplit++) { int i2 = index1 + (index2 - index1) * (iSplit + 1) / degree; SplitStruct newSplit; if (direction == 0) { newSplit = new SplitStruct(i1, i2, (byte) 1); } else if (direction == 1) { newSplit = new SplitStruct(i1, i2, (byte) 2); } else { newSplit = new SplitStruct(i1, i2, (byte) 0); } toBePartitioned.add(newSplit); i1 = i2; } } } // All nodes stored in level-order traversal Vector<SplitStruct> nodes = new Vector<SplitStruct>(); final Queue<SplitStruct> toBePartitioned = new LinkedList<SplitStruct>(); toBePartitioned.add(new SplitStruct(0, elementCount, SplitStruct.DIRECTION_X)); while (!toBePartitioned.isEmpty()) { SplitStruct split = toBePartitioned.poll(); if (nodes.size() < nonLeafNodeCount) { // This is a non-leaf split.partition(toBePartitioned); } nodes.add(split); } if (nodes.size() != nodeCount) { throw new RuntimeException( "Expected node count: " + nodeCount + ". Real node count: " + nodes.size()); } // Now we have our data sorted in the required order. Start building // the tree. // Store the offset of each leaf node in the tree FSDataOutputStream fakeOut = new FSDataOutputStream(new java.io.OutputStream() { // Null output stream @Override public void write(int b) throws IOException { // Do nothing } @Override public void write(byte[] b, int off, int len) throws IOException { // Do nothing } @Override public void write(byte[] b) throws IOException { // Do nothing } }, null, TreeHeaderSize + nodes.size() * NodeSize); for (int i_leaf = nonLeafNodeCount, i = 0; i_leaf < nodes.size(); i_leaf++) { nodes.elementAt(i_leaf).offsetOfFirstElement = (int) fakeOut.getPos(); if (i != nodes.elementAt(i_leaf).index1) throw new RuntimeException(); double t1, x1, y1, t2, x2, y2; // Initialize MBR to first object int eol = skipToEOL(element_bytes, offsets[i]); fakeOut.write(element_bytes, offsets[i], eol - offsets[i]); line.set(element_bytes, offsets[i], eol - offsets[i] - 1); stockObject.fromText(line); Prism mbr = stockObject.getMBR(); t1 = mbr.t1; x1 = mbr.x1; y1 = mbr.y1; t2 = mbr.t2; x2 = mbr.x2; y2 = mbr.y2; i++; while (i < nodes.elementAt(i_leaf).index2) { eol = skipToEOL(element_bytes, offsets[i]); fakeOut.write(element_bytes, offsets[i], eol - offsets[i]); line.set(element_bytes, offsets[i], eol - offsets[i] - 1); stockObject.fromText(line); mbr = stockObject.getMBR(); if (mbr.t1 < t1) t1 = mbr.t1; if (mbr.x1 < x1) x1 = mbr.x1; if (mbr.y1 < y1) y1 = mbr.y1; if (mbr.t2 > t2) t2 = mbr.t2; if (mbr.x2 > x2) x2 = mbr.x2; if (mbr.y2 > y2) y2 = mbr.y2; i++; } nodes.elementAt(i_leaf).set(t1, x1, y1, t2, x2, y2); } fakeOut.close(); fakeOut = null; // Calculate MBR and offsetOfFirstElement for non-leaves for (int i_node = nonLeafNodeCount - 1; i_node >= 0; i_node--) { int i_first_child = i_node * degree + 1; nodes.elementAt(i_node).offsetOfFirstElement = nodes.elementAt(i_first_child).offsetOfFirstElement; int i_child = 0; Prism mbr; mbr = nodes.elementAt(i_first_child + i_child); double t1 = mbr.t1; double x1 = mbr.x1; double y1 = mbr.y1; double t2 = mbr.t2; double x2 = mbr.x2; double y2 = mbr.y2; i_child++; while (i_child < degree) { mbr = nodes.elementAt(i_first_child + i_child); if (mbr.t1 < t1) t1 = mbr.t1; if (mbr.x1 < x1) x1 = mbr.x1; if (mbr.y1 < y1) y1 = mbr.y1; if (mbr.t2 > t2) t2 = mbr.t2; if (mbr.x2 > x2) x2 = mbr.x2; if (mbr.y2 > y2) y2 = mbr.y2; i_child++; } nodes.elementAt(i_node).set(t1, x1, y1, t2, x2, y2); } // Start writing the tree // write tree header (including size) // Total tree size. (== Total bytes written - 8 bytes for the size // itself) dataOut.writeInt(TreeHeaderSize + NodeSize * nodeCount + len); // Tree height dataOut.writeInt(height); // Degree dataOut.writeInt(degree); dataOut.writeInt(elementCount); //isColumnar dataOut.writeInt(columnarStorage ? 1 : 0); // write nodes for (SplitStruct node : nodes) { node.write(dataOut); } // write elements if (columnarStorage) { byte[] index_bs = index_bos.toByteArray(); byte[][] bss = new byte[bos.length][]; for (int i = 0; i < bss.length; i++) { bss[i] = bos[i].toByteArray(); } for (int element_i = 0; element_i < elementCount; element_i++) { //int eol = skipToEOL(element_bytes, offsets[element_i]); //dataOut.write(element_bytes, offsets[element_i], eol - offsets[element_i]); dataOut.write(index_bs, ids[element_i] * IndexUnitSize, IndexUnitSize); } for (int i = 0; i < fields.length; i++) { int fieldSize = 0; if (fields[i].getType().equals(Integer.TYPE)) { fieldSize = 4; } else if (fields[i].getType().equals(Long.TYPE)) { fieldSize = 8; } else if (fields[i].getType().equals(Double.TYPE)) { fieldSize = 8; } else { //throw new RuntimeException("Unsupported field type: " + fields[i].getType().getName()); continue; } for (int element_i = 0; element_i < elementCount; element_i++) { //int eol = skipToEOL(element_bytes, offsets[element_i]); //dataOut.write(element_bytes, offsets[element_i], eol - offsets[element_i]); dataOut.write(bss[i], ids[element_i] * fieldSize, fieldSize); } } } else { for (int element_i = 0; element_i < elementCount; element_i++) { int eol = skipToEOL(element_bytes, offsets[element_i]); dataOut.write(element_bytes, offsets[element_i], eol - offsets[element_i]); } } } catch (IOException e) { e.printStackTrace(); } catch (IllegalArgumentException e) { // TODO Auto-generated catch block e.printStackTrace(); } catch (IllegalAccessException e) { // TODO Auto-generated catch block e.printStackTrace(); } }
From source file:com.ricemap.spateDB.mapred.SpatialRecordReader.java
License:Apache License
/** * Reads the next line from input and return true if a line was read. * If no more lines are available in this split, a false is returned. * @param value// w w w . j a v a2 s.co m * @return * @throws IOException */ protected boolean nextLine(Text value) throws IOException { if (blockType == BlockType.RTREE && pos == 8) { // File is positioned at the RTree header // Skip the header and go to first data object in file pos += RTree.skipHeader(in); LOG.info("Skipped R-tree to position: " + pos); // Reinitialize record reader at the new position lineReader = new LineReader(in); } while (getFilePosition() <= end) { value.clear(); int b = 0; if (buffer != null) { // Read the first line encountered in buffer int eol = RTree.skipToEOL(buffer, 0); b += eol; value.append(buffer, 0, eol); if (eol < buffer.length) { // There are still some bytes remaining in buffer byte[] tmp = new byte[buffer.length - eol]; System.arraycopy(buffer, eol, tmp, 0, tmp.length); } else { buffer = null; } // Check if a complete line has been read from the buffer byte last_byte = value.getBytes()[value.getLength() - 1]; if (last_byte == '\n' || last_byte == '\r') return true; } // Read the first line from stream Text temp = new Text(); b += lineReader.readLine(temp); if (b == 0) { // Indicates an end of stream return false; } pos += b; // Append the part read from stream to the part extracted from buffer value.append(temp.getBytes(), 0, temp.getLength()); if (value.getLength() > 1) { // Read a non-empty line. Note that end-of-line character is included return true; } } // Reached end of file return false; }