List of usage examples for org.apache.hadoop.io Text set
public void set(byte[] utf8, int start, int len)
From source file:edu.cmu.cs.in.hadoop.HoopWholeFileRecordReader.java
License:Open Source License
/** * /* w w w.ja v a2 s . c o m*/ */ @Override public boolean next(LongWritable key, Text value) throws IOException { debug("next ()"); byte[] buffer = new byte[(int) fileSize]; if (inStream == null) { debug("Internal error: no input stream available!"); return (false); } len = inStream.read(buffer, 0, (int) fileSize); //debug ("Actually read: " +len); if (len != -1) { value.set(buffer, 0, len); } else { return (false); } debug("Read the file"); return true; }
From source file:edu.cshl.schatz.jnomics.util.TextCutter.java
License:Open Source License
/** * Sets the contents of <code>text</code> to the value of the requested cut. * //w ww. j ava 2 s . c om * @param text The {@link Text} instance to reset. * @param cutIndex The 0-based index of the desired cut. * @return The passed {@link Text} instance text (not a copy). * @throws ArrayIndexOutOfBoundsException if cutIndex is greater than or * equal to the number of cuts. */ public Text getCut(Text text, int cutIndex) { if (modFlag) { reinitialize(); } if (cutIndex < 0) { cutIndex = cutCount + cutIndex; } if (cutIndex >= cutCount) { throw new ArrayIndexOutOfBoundsException( "Cut " + cutIndex + " does not exist (cutCount=" + cutCount + ")"); } int position = cutIndices[cutIndex][0]; int length = cutIndices[cutIndex][1]; text.set(sourceText.getBytes(), position, length); return text; }
From source file:edu.cshl.schatz.jnomics.util.TextCutter.java
License:Open Source License
/** * Sets the contents of <code>text</code> to the value of the requested cuts * (including any intermediate delimiters). * <p>//from w ww . j a v a 2s .c o m * Negative cut indices are interpreted as * <code>{@link #getCutCount()} + cutIndex</code>. For example, a * <code>cutIndex</code> of -1 would return the last cut. * <p> * If <code>lastIndex < firstIndex</code> (after converting negative indices * to their positive equivalents), then the resulting cut order is reversed. * For example, given the input "0 1 2 3 4", <code>getCutRange(4,2)</code> * and <code>getCutRange(-1,-3)</code> would both return "4 3 2". * * @param text The {@link Text} instance to reset. * @param firstIndex The 0-based index of the first desired cut in the range * (inclusive). * @param lastIndex The 0-based index of the last desired cut in the range * (inclusive). * @return The passed {@link Text} instance text (not a copy). * @throws ArrayIndexOutOfBoundsException if cutIndex is greater than or * equal to the number of cuts. */ public Text getCutRange(Text text, int firstIndex, int lastIndex) { if (modFlag) { reinitialize(); } if (firstIndex < 0) { firstIndex = cutCount + firstIndex; } if (lastIndex < 0) { lastIndex = cutCount + lastIndex; } if (lastIndex >= cutCount) { throw new ArrayIndexOutOfBoundsException("Requested cut does not exist (cutCount=" + cutCount + ")"); } int position, length; if (firstIndex <= lastIndex) { position = cutIndices[firstIndex][0]; length = lastIndex - firstIndex; for (int i = firstIndex; i <= lastIndex; i++) { length += cutIndices[i][1]; } text.set(sourceText.getBytes(), position, length); } else { final byte[] delimBytes = new byte[] { (byte) delimiterChar }; position = cutIndices[firstIndex][0]; length = cutIndices[firstIndex][1]; text.set(sourceText.getBytes(), position, length); for (int i = firstIndex - 1; i >= lastIndex; i--) { position = cutIndices[i][0]; length = cutIndices[i][1]; text.append(delimBytes, 0, 1); text.append(sourceText.getBytes(), position, length); } } return text; }
From source file:edu.stolaf.cs.wmrserver.streaming.PipeMapRed.java
License:Apache License
/** * Split a line into key and value.// w w w. j ava 2 s . co m * @param line: a byte array of line containing UTF-8 bytes * @param key: key of a record * @param val: value of a record * @throws IOException */ void splitKeyVal(byte[] line, int length, Text key, Text val) throws IOException { int numKeyFields = getNumOfKeyFields(); byte[] separator = getFieldSeparator(); // Need to find numKeyFields separators int pos = UTF8ByteArrayUtils.findBytes(line, 0, length, separator); for (int k = 1; k < numKeyFields && pos != -1; k++) { pos = UTF8ByteArrayUtils.findBytes(line, pos + separator.length, length, separator); } try { if (pos == -1) { key.set(line, 0, length); val.set(""); } else { StreamKeyValUtil.splitKeyVal(line, 0, length, key, val, pos, separator.length); } } catch (CharacterCodingException e) { LOG.warn(StringUtils.stringifyException(e)); } }
From source file:edu.umn.cs.spatialHadoop.core.CSVOGC.java
License:Open Source License
@Override public void fromText(Text text) { byte[] bytes = text.getBytes(); int separatorsEncountered = 0; int i1 = 0;/* w w w . jav a 2 s . co m*/ // Locate the required column while (separatorsEncountered < column && i1 < text.getLength()) { if (bytes[i1++] == separator) separatorsEncountered++; } if (i1 == text.getLength()) { this.prefix = new byte[i1]; System.arraycopy(bytes, 0, prefix, 0, i1); super.geom = null; this.suffix = null; return; } int i2 = i1 + 1; while (i2 < text.getLength() && bytes[i2] != separator) i2++; // Copy prefix and suffix if (i1 == 0) { prefix = null; } else { prefix = new byte[i1]; System.arraycopy(bytes, 0, prefix, 0, i1); } if (i2 == text.getLength()) { suffix = null; } else { suffix = new byte[text.getLength() - i2]; System.arraycopy(bytes, i2, suffix, 0, text.getLength() - i2); } // Chop prefix and suffix and leave only the selected column text.set(bytes, i1, i2 - i1); super.fromText(text); }
From source file:edu.umn.cs.spatialHadoop.core.GridInfo.java
License:Open Source License
@Override public void fromText(Text text) { super.fromText(text); if (text.getLength() > 0) { // Remove the first comma text.set(text.getBytes(), 1, text.getLength() - 1); columns = (int) TextSerializerHelper.consumeInt(text, ','); rows = (int) TextSerializerHelper.consumeInt(text, '\0'); }//w ww .j a v a2s . co m }
From source file:edu.umn.cs.spatialHadoop.core.Partition.java
License:Open Source License
@Override public void fromText(Text text) { super.fromText(text); text.set(text.getBytes(), 1, text.getLength() - 1); // Skip comma this.recordCount = TextSerializerHelper.consumeLong(text, ','); this.size = TextSerializerHelper.consumeLong(text, ','); filename = text.toString();/*w ww .j av a2s . c o m*/ }
From source file:edu.umn.cs.spatialHadoop.core.RTree.java
License:Open Source License
/** * Builds the RTree given a serialized list of elements. It uses the given * stockObject to deserialize these elements using * {@link TextSerializable#fromText(Text)} and build the tree. Also writes the * created tree to the disk directly./*from w ww . j a v a 2 s. com*/ * * @param element_bytes * - serialization of all elements separated by new lines * @param offset * - offset of the first byte to use in elements_bytes * @param len * - number of bytes to use in elements_bytes * @param degree * - Degree of the R-tree to build in terms of number of children per * node * @param dataOut * - output stream to write the result to. * @param fast_sort * - setting this to <code>true</code> allows the method to run * faster by materializing the offset of each element in the list * which speeds up the comparison. However, this requires an * additional 16 bytes per element. So, for each 1M elements, the * method will require an additional 16 M bytes (approximately). */ public void bulkLoadWrite(final byte[] element_bytes, final int offset, final int len, final int degree, DataOutput dataOut, final boolean fast_sort) { try { // Count number of elements in the given text int i_start = offset; final Text line = new Text(); while (i_start < offset + len) { int i_end = skipToEOL(element_bytes, i_start); // Extract the line without end of line character line.set(element_bytes, i_start, i_end - i_start - 1); stockObject.fromText(line); elementCount++; i_start = i_end; } LOG.info("Bulk loading an RTree with " + elementCount + " elements"); // It turns out the findBestDegree returns the best degree when the whole // tree is loaded to memory when processed. However, as current algorithms // process the tree while it's on disk, a higher degree should be selected // such that a node fits one file block (assumed to be 4K). //final int degree = findBestDegree(bytesAvailable, elementCount); LOG.info("Writing an RTree with degree " + degree); int height = Math.max(1, (int) Math.ceil(Math.log(elementCount) / Math.log(degree))); int leafNodeCount = (int) Math.pow(degree, height - 1); if (elementCount < 2 * leafNodeCount && height > 1) { height--; leafNodeCount = (int) Math.pow(degree, height - 1); } int nodeCount = (int) ((Math.pow(degree, height) - 1) / (degree - 1)); int nonLeafNodeCount = nodeCount - leafNodeCount; // Keep track of the offset of each element in the text final int[] offsets = new int[elementCount]; final double[] xs = fast_sort ? new double[elementCount] : null; final double[] ys = fast_sort ? new double[elementCount] : null; i_start = offset; line.clear(); for (int i = 0; i < elementCount; i++) { offsets[i] = i_start; int i_end = skipToEOL(element_bytes, i_start); if (xs != null) { // Extract the line with end of line character line.set(element_bytes, i_start, i_end - i_start - 1); stockObject.fromText(line); // Sample center of the shape xs[i] = (stockObject.getMBR().x1 + stockObject.getMBR().x2) / 2; ys[i] = (stockObject.getMBR().y1 + stockObject.getMBR().y2) / 2; } i_start = i_end; } /**A struct to store information about a split*/ class SplitStruct extends Rectangle { /**Start and end index for this split*/ int index1, index2; /**Direction of this split*/ byte direction; /**Index of first element on disk*/ int offsetOfFirstElement; static final byte DIRECTION_X = 0; static final byte DIRECTION_Y = 1; SplitStruct(int index1, int index2, byte direction) { this.index1 = index1; this.index2 = index2; this.direction = direction; } @Override public void write(DataOutput out) throws IOException { out.writeInt(offsetOfFirstElement); super.write(out); } void partition(Queue<SplitStruct> toBePartitioned) { IndexedSortable sortableX; IndexedSortable sortableY; if (fast_sort) { // Use materialized xs[] and ys[] to do the comparisons sortableX = new IndexedSortable() { @Override public void swap(int i, int j) { // Swap xs double tempx = xs[i]; xs[i] = xs[j]; xs[j] = tempx; // Swap ys double tempY = ys[i]; ys[i] = ys[j]; ys[j] = tempY; // Swap id int tempid = offsets[i]; offsets[i] = offsets[j]; offsets[j] = tempid; } @Override public int compare(int i, int j) { if (xs[i] < xs[j]) return -1; if (xs[i] > xs[j]) return 1; return 0; } }; sortableY = new IndexedSortable() { @Override public void swap(int i, int j) { // Swap xs double tempx = xs[i]; xs[i] = xs[j]; xs[j] = tempx; // Swap ys double tempY = ys[i]; ys[i] = ys[j]; ys[j] = tempY; // Swap id int tempid = offsets[i]; offsets[i] = offsets[j]; offsets[j] = tempid; } @Override public int compare(int i, int j) { if (ys[i] < ys[j]) return -1; if (ys[i] > ys[j]) return 1; return 0; } }; } else { // No materialized xs and ys. Always deserialize objects to compare sortableX = new IndexedSortable() { @Override public void swap(int i, int j) { // Swap id int tempid = offsets[i]; offsets[i] = offsets[j]; offsets[j] = tempid; } @Override public int compare(int i, int j) { // Get end of line int eol = skipToEOL(element_bytes, offsets[i]); line.set(element_bytes, offsets[i], eol - offsets[i] - 1); stockObject.fromText(line); double xi = (stockObject.getMBR().x1 + stockObject.getMBR().x2) / 2; eol = skipToEOL(element_bytes, offsets[j]); line.set(element_bytes, offsets[j], eol - offsets[j] - 1); stockObject.fromText(line); double xj = (stockObject.getMBR().x1 + stockObject.getMBR().x2) / 2; if (xi < xj) return -1; if (xi > xj) return 1; return 0; } }; sortableY = new IndexedSortable() { @Override public void swap(int i, int j) { // Swap id int tempid = offsets[i]; offsets[i] = offsets[j]; offsets[j] = tempid; } @Override public int compare(int i, int j) { int eol = skipToEOL(element_bytes, offsets[i]); line.set(element_bytes, offsets[i], eol - offsets[i] - 1); stockObject.fromText(line); double yi = (stockObject.getMBR().y1 + stockObject.getMBR().y2) / 2; eol = skipToEOL(element_bytes, offsets[j]); line.set(element_bytes, offsets[j], eol - offsets[j] - 1); stockObject.fromText(line); double yj = (stockObject.getMBR().y1 + stockObject.getMBR().y2) / 2; if (yi < yj) return -1; if (yi > yj) return 1; return 0; } }; } final IndexedSorter sorter = new QuickSort(); final IndexedSortable[] sortables = new IndexedSortable[2]; sortables[SplitStruct.DIRECTION_X] = sortableX; sortables[SplitStruct.DIRECTION_Y] = sortableY; sorter.sort(sortables[direction], index1, index2); // Partition into maxEntries partitions (equally) and // create a SplitStruct for each partition int i1 = index1; for (int iSplit = 0; iSplit < degree; iSplit++) { int i2 = index1 + (index2 - index1) * (iSplit + 1) / degree; SplitStruct newSplit = new SplitStruct(i1, i2, (byte) (1 - direction)); toBePartitioned.add(newSplit); i1 = i2; } } } // All nodes stored in level-order traversal Vector<SplitStruct> nodes = new Vector<SplitStruct>(); final Queue<SplitStruct> toBePartitioned = new LinkedList<SplitStruct>(); toBePartitioned.add(new SplitStruct(0, elementCount, SplitStruct.DIRECTION_X)); while (!toBePartitioned.isEmpty()) { SplitStruct split = toBePartitioned.poll(); if (nodes.size() < nonLeafNodeCount) { // This is a non-leaf split.partition(toBePartitioned); } nodes.add(split); } if (nodes.size() != nodeCount) { throw new RuntimeException( "Expected node count: " + nodeCount + ". Real node count: " + nodes.size()); } // Now we have our data sorted in the required order. Start building // the tree. // Store the offset of each leaf node in the tree FSDataOutputStream fakeOut = null; try { fakeOut = new FSDataOutputStream(new java.io.OutputStream() { // Null output stream @Override public void write(int b) throws IOException { // Do nothing } @Override public void write(byte[] b, int off, int len) throws IOException { // Do nothing } @Override public void write(byte[] b) throws IOException { // Do nothing } }, null, TreeHeaderSize + nodes.size() * NodeSize); for (int i_leaf = nonLeafNodeCount, i = 0; i_leaf < nodes.size(); i_leaf++) { nodes.elementAt(i_leaf).offsetOfFirstElement = (int) fakeOut.getPos(); if (i != nodes.elementAt(i_leaf).index1) throw new RuntimeException(); double x1, y1, x2, y2; // Initialize MBR to first object int eol = skipToEOL(element_bytes, offsets[i]); fakeOut.write(element_bytes, offsets[i], eol - offsets[i]); line.set(element_bytes, offsets[i], eol - offsets[i] - 1); stockObject.fromText(line); Rectangle mbr = stockObject.getMBR(); x1 = mbr.x1; y1 = mbr.y1; x2 = mbr.x2; y2 = mbr.y2; i++; while (i < nodes.elementAt(i_leaf).index2) { eol = skipToEOL(element_bytes, offsets[i]); fakeOut.write(element_bytes, offsets[i], eol - offsets[i]); line.set(element_bytes, offsets[i], eol - offsets[i] - 1); stockObject.fromText(line); mbr = stockObject.getMBR(); if (mbr.x1 < x1) x1 = mbr.x1; if (mbr.y1 < y1) y1 = mbr.y1; if (mbr.x2 > x2) x2 = mbr.x2; if (mbr.y2 > y2) y2 = mbr.y2; i++; } nodes.elementAt(i_leaf).set(x1, y1, x2, y2); } } finally { if (fakeOut != null) fakeOut.close(); } // Calculate MBR and offsetOfFirstElement for non-leaves for (int i_node = nonLeafNodeCount - 1; i_node >= 0; i_node--) { int i_first_child = i_node * degree + 1; nodes.elementAt(i_node).offsetOfFirstElement = nodes.elementAt(i_first_child).offsetOfFirstElement; int i_child = 0; Rectangle mbr; mbr = nodes.elementAt(i_first_child + i_child); double x1 = mbr.x1; double y1 = mbr.y1; double x2 = mbr.x2; double y2 = mbr.y2; i_child++; while (i_child < degree) { mbr = nodes.elementAt(i_first_child + i_child); if (mbr.x1 < x1) x1 = mbr.x1; if (mbr.y1 < y1) y1 = mbr.y1; if (mbr.x2 > x2) x2 = mbr.x2; if (mbr.y2 > y2) y2 = mbr.y2; i_child++; } nodes.elementAt(i_node).set(x1, y1, x2, y2); } // Start writing the tree // write tree header (including size) // Total tree size. (== Total bytes written - 8 bytes for the size itself) dataOut.writeInt(TreeHeaderSize + NodeSize * nodeCount + len); // Tree height dataOut.writeInt(height); // Degree dataOut.writeInt(degree); dataOut.writeInt(elementCount); // write nodes for (SplitStruct node : nodes) { node.write(dataOut); } // write elements for (int element_i = 0; element_i < elementCount; element_i++) { int eol = skipToEOL(element_bytes, offsets[element_i]); dataOut.write(element_bytes, offsets[element_i], eol - offsets[element_i]); } } catch (IOException e) { e.printStackTrace(); } }
From source file:edu.umn.cs.spatialHadoop.indexing.RTree.java
License:Open Source License
/** * Builds the RTree given a serialized list of elements. It uses the given * stockObject to deserialize these elements using * {@link TextSerializable#fromText(Text)} and build the tree. Also writes the * created tree to the disk directly./* w ww . j ava2 s. c o m*/ * * @param element_bytes * - serialization of all elements separated by new lines * @param offset * - offset of the first byte to use in elements_bytes * @param len * - number of bytes to use in elements_bytes * @param degree * - Degree of the R-tree to build in terms of number of children per * node * @param dataOut * - output stream to write the result to. * @param fast_sort * - setting this to <code>true</code> allows the method to run * faster by materializing the offset of each element in the list * which speeds up the comparison. However, this requires an * additional 16 bytes per element. So, for each 1M elements, the * method will require an additional 16 M bytes (approximately). */ public static void bulkLoadWrite(final byte[] element_bytes, final int offset, final int len, final int degree, DataOutput dataOut, final Shape stockObject, final boolean fast_sort) { try { int elementCount = 0; // Count number of elements in the given text int i_start = offset; final Text line = new Text(); while (i_start < offset + len) { int i_end = skipToEOL(element_bytes, i_start); // Extract the line without end of line character line.set(element_bytes, i_start, i_end - i_start - 1); stockObject.fromText(line); elementCount++; i_start = i_end; } LOG.info("Bulk loading an RTree with " + elementCount + " elements"); // It turns out the findBestDegree returns the best degree when the whole // tree is loaded to memory when processed. However, as current algorithms // process the tree while it's on disk, a higher degree should be selected // such that a node fits one file block (assumed to be 4K). //final int degree = findBestDegree(bytesAvailable, elementCount); int height = Math.max(1, (int) Math.ceil(Math.log(elementCount) / Math.log(degree))); int leafNodeCount = (int) Math.pow(degree, height - 1); if (elementCount < 2 * leafNodeCount && height > 1) { height--; leafNodeCount = (int) Math.pow(degree, height - 1); } int nodeCount = (int) ((Math.pow(degree, height) - 1) / (degree - 1)); int nonLeafNodeCount = nodeCount - leafNodeCount; // Keep track of the offset of each element in the text final int[] offsets = new int[elementCount]; final double[] xs = fast_sort ? new double[elementCount] : null; final double[] ys = fast_sort ? new double[elementCount] : null; i_start = offset; line.clear(); for (int i = 0; i < elementCount; i++) { offsets[i] = i_start; int i_end = skipToEOL(element_bytes, i_start); if (xs != null) { // Extract the line with end of line character line.set(element_bytes, i_start, i_end - i_start - 1); stockObject.fromText(line); // Sample center of the shape xs[i] = (stockObject.getMBR().x1 + stockObject.getMBR().x2) / 2; ys[i] = (stockObject.getMBR().y1 + stockObject.getMBR().y2) / 2; } i_start = i_end; } /**A struct to store information about a split*/ class SplitStruct extends Rectangle { /**Start and end index for this split*/ int index1, index2; /**Direction of this split*/ byte direction; /**Index of first element on disk*/ int offsetOfFirstElement; static final byte DIRECTION_X = 0; static final byte DIRECTION_Y = 1; SplitStruct(int index1, int index2, byte direction) { this.index1 = index1; this.index2 = index2; this.direction = direction; } @Override public void write(DataOutput out) throws IOException { out.writeInt(offsetOfFirstElement); super.write(out); } void partition(Queue<SplitStruct> toBePartitioned) { IndexedSortable sortableX; IndexedSortable sortableY; if (fast_sort) { // Use materialized xs[] and ys[] to do the comparisons sortableX = new IndexedSortable() { @Override public void swap(int i, int j) { // Swap xs double tempx = xs[i]; xs[i] = xs[j]; xs[j] = tempx; // Swap ys double tempY = ys[i]; ys[i] = ys[j]; ys[j] = tempY; // Swap id int tempid = offsets[i]; offsets[i] = offsets[j]; offsets[j] = tempid; } @Override public int compare(int i, int j) { if (xs[i] < xs[j]) return -1; if (xs[i] > xs[j]) return 1; return 0; } }; sortableY = new IndexedSortable() { @Override public void swap(int i, int j) { // Swap xs double tempx = xs[i]; xs[i] = xs[j]; xs[j] = tempx; // Swap ys double tempY = ys[i]; ys[i] = ys[j]; ys[j] = tempY; // Swap id int tempid = offsets[i]; offsets[i] = offsets[j]; offsets[j] = tempid; } @Override public int compare(int i, int j) { if (ys[i] < ys[j]) return -1; if (ys[i] > ys[j]) return 1; return 0; } }; } else { // No materialized xs and ys. Always deserialize objects to compare sortableX = new IndexedSortable() { @Override public void swap(int i, int j) { // Swap id int tempid = offsets[i]; offsets[i] = offsets[j]; offsets[j] = tempid; } @Override public int compare(int i, int j) { // Get end of line int eol = skipToEOL(element_bytes, offsets[i]); line.set(element_bytes, offsets[i], eol - offsets[i] - 1); stockObject.fromText(line); double xi = (stockObject.getMBR().x1 + stockObject.getMBR().x2) / 2; eol = skipToEOL(element_bytes, offsets[j]); line.set(element_bytes, offsets[j], eol - offsets[j] - 1); stockObject.fromText(line); double xj = (stockObject.getMBR().x1 + stockObject.getMBR().x2) / 2; if (xi < xj) return -1; if (xi > xj) return 1; return 0; } }; sortableY = new IndexedSortable() { @Override public void swap(int i, int j) { // Swap id int tempid = offsets[i]; offsets[i] = offsets[j]; offsets[j] = tempid; } @Override public int compare(int i, int j) { int eol = skipToEOL(element_bytes, offsets[i]); line.set(element_bytes, offsets[i], eol - offsets[i] - 1); stockObject.fromText(line); double yi = (stockObject.getMBR().y1 + stockObject.getMBR().y2) / 2; eol = skipToEOL(element_bytes, offsets[j]); line.set(element_bytes, offsets[j], eol - offsets[j] - 1); stockObject.fromText(line); double yj = (stockObject.getMBR().y1 + stockObject.getMBR().y2) / 2; if (yi < yj) return -1; if (yi > yj) return 1; return 0; } }; } final IndexedSorter sorter = new QuickSort(); final IndexedSortable[] sortables = new IndexedSortable[2]; sortables[SplitStruct.DIRECTION_X] = sortableX; sortables[SplitStruct.DIRECTION_Y] = sortableY; sorter.sort(sortables[direction], index1, index2); // Partition into maxEntries partitions (equally) and // create a SplitStruct for each partition int i1 = index1; for (int iSplit = 0; iSplit < degree; iSplit++) { int i2 = index1 + (index2 - index1) * (iSplit + 1) / degree; SplitStruct newSplit = new SplitStruct(i1, i2, (byte) (1 - direction)); toBePartitioned.add(newSplit); i1 = i2; } } } // All nodes stored in level-order traversal Vector<SplitStruct> nodes = new Vector<SplitStruct>(); final Queue<SplitStruct> toBePartitioned = new LinkedList<SplitStruct>(); toBePartitioned.add(new SplitStruct(0, elementCount, SplitStruct.DIRECTION_X)); while (!toBePartitioned.isEmpty()) { SplitStruct split = toBePartitioned.poll(); if (nodes.size() < nonLeafNodeCount) { // This is a non-leaf split.partition(toBePartitioned); } nodes.add(split); } if (nodes.size() != nodeCount) { throw new RuntimeException( "Expected node count: " + nodeCount + ". Real node count: " + nodes.size()); } // Now we have our data sorted in the required order. Start building // the tree. // Store the offset of each leaf node in the tree FSDataOutputStream fakeOut = null; try { fakeOut = new FSDataOutputStream(new java.io.OutputStream() { // Null output stream @Override public void write(int b) throws IOException { // Do nothing } @Override public void write(byte[] b, int off, int len) throws IOException { // Do nothing } @Override public void write(byte[] b) throws IOException { // Do nothing } }, null, TreeHeaderSize + nodes.size() * NodeSize); for (int i_leaf = nonLeafNodeCount, i = 0; i_leaf < nodes.size(); i_leaf++) { nodes.elementAt(i_leaf).offsetOfFirstElement = (int) fakeOut.getPos(); if (i != nodes.elementAt(i_leaf).index1) throw new RuntimeException(); double x1, y1, x2, y2; // Initialize MBR to first object int eol = skipToEOL(element_bytes, offsets[i]); fakeOut.write(element_bytes, offsets[i], eol - offsets[i]); line.set(element_bytes, offsets[i], eol - offsets[i] - 1); stockObject.fromText(line); Rectangle mbr = stockObject.getMBR(); x1 = mbr.x1; y1 = mbr.y1; x2 = mbr.x2; y2 = mbr.y2; i++; while (i < nodes.elementAt(i_leaf).index2) { eol = skipToEOL(element_bytes, offsets[i]); fakeOut.write(element_bytes, offsets[i], eol - offsets[i]); line.set(element_bytes, offsets[i], eol - offsets[i] - 1); stockObject.fromText(line); mbr = stockObject.getMBR(); if (mbr.x1 < x1) x1 = mbr.x1; if (mbr.y1 < y1) y1 = mbr.y1; if (mbr.x2 > x2) x2 = mbr.x2; if (mbr.y2 > y2) y2 = mbr.y2; i++; } nodes.elementAt(i_leaf).set(x1, y1, x2, y2); } } finally { if (fakeOut != null) fakeOut.close(); } // Calculate MBR and offsetOfFirstElement for non-leaves for (int i_node = nonLeafNodeCount - 1; i_node >= 0; i_node--) { int i_first_child = i_node * degree + 1; nodes.elementAt(i_node).offsetOfFirstElement = nodes.elementAt(i_first_child).offsetOfFirstElement; int i_child = 0; Rectangle mbr; mbr = nodes.elementAt(i_first_child + i_child); double x1 = mbr.x1; double y1 = mbr.y1; double x2 = mbr.x2; double y2 = mbr.y2; i_child++; while (i_child < degree) { mbr = nodes.elementAt(i_first_child + i_child); if (mbr.x1 < x1) x1 = mbr.x1; if (mbr.y1 < y1) y1 = mbr.y1; if (mbr.x2 > x2) x2 = mbr.x2; if (mbr.y2 > y2) y2 = mbr.y2; i_child++; } nodes.elementAt(i_node).set(x1, y1, x2, y2); } // Start writing the tree // write tree header (including size) // Total tree size. (== Total bytes written - 8 bytes for the size itself) dataOut.writeInt(TreeHeaderSize + NodeSize * nodeCount + len); // Tree height dataOut.writeInt(height); // Degree dataOut.writeInt(degree); dataOut.writeInt(elementCount); // write nodes for (SplitStruct node : nodes) { node.write(dataOut); } // write elements for (int element_i = 0; element_i < elementCount; element_i++) { int eol = skipToEOL(element_bytes, offsets[element_i]); dataOut.write(element_bytes, offsets[element_i], eol - offsets[element_i]); } } catch (IOException e) { e.printStackTrace(); } }
From source file:edu.umn.cs.spatialHadoop.io.TextSerializerHelper.java
License:Open Source License
/** * Deserializes and consumes a double from the given text. Consuming means all * characters read for deserialization are removed from the given text. * If separator is non-zero, a double is read and consumed up to the first * occurrence of this separator. The separator is also consumed. * @param text/* www. jav a2s . co m*/ * @param separator * @return */ public static double consumeDouble(Text text, char separator) { int i = 0; byte[] bytes = text.getBytes(); // Skip until the separator or end of text while (i < text.getLength() && ((bytes[i] >= '0' && bytes[i] <= '9') || bytes[i] == 'e' || bytes[i] == 'E' || bytes[i] == '-' || bytes[i] == '+' || bytes[i] == '.')) i++; double d = Double.parseDouble(new String(bytes, 0, i)); if (i < text.getLength() && bytes[i] == separator) i++; System.arraycopy(bytes, i, bytes, 0, text.getLength() - i); text.set(bytes, 0, text.getLength() - i); return d; }