List of usage examples for org.apache.hadoop.io Text clear
public void clear()
From source file:edu.isi.mavuno.util.TokenWritable.java
License:Apache License
protected static void safeSet(Text t, String s) { if (s == null) { t.clear(); } else {/*from w w w . j a v a2 s . c om*/ t.set(s); } }
From source file:edu.stanford.pigir.warc.WarcRecord.java
License:Open Source License
/** * The actual heavy lifting of reading wbRecordReader the next WARC record. The * readContent parameter is used to support cases when the original * Pig query project out the content. We save time if we don't need * that content./*from w w w .j a v a2s .co m*/ * * @param warcLineReader a line reader * @param readContent indicate whether the content of the record is needed, as opposed to just the WARC header info. * @return the content bytes (w/ the headerBuffer populated) * @throws java.io.IOException */ private static byte[] readNextRecord(LineAndChunkReader warcLineReader, boolean readContent) throws IOException { if (warcLineReader == null) { return null; } Text txtBuf = new Text(); byte[] retContent = null; tmpOptionalHeaderKeys.clear(); tmpGrandTotalBytesRead = 0L; tmpHeaderMap.clear(); // Find our WARC header boolean foundWARCHeader = scanToRecordStart(warcLineReader, txtBuf); txtBuf.clear(); // No WARC header found? if (!foundWARCHeader) { return null; } // Read the header (up to the first empty line). // Make sure we get the (mandatory) content length // is wbRecordReader the header, because we rely on it below. // We do not check for the other mandatory header fields: int contentLength = pullHeaderFromStream(warcLineReader, txtBuf); txtBuf.clear(); if (contentLength < 0) { return null; } if (readContent) { // Pull the bytes of the content from the stream: retContent = new byte[contentLength]; Integer totalRead = pullContent(warcLineReader, retContent, contentLength); if (totalRead == null) throw new IOException("Could not read content from WARC record ID: " + tmpHeaderMap.get(WARC_RECORD_ID) + " of supposed content length " + tmpHeaderMap.get(CONTENT_LENGTH) + ". Reason is other than EOF."); if (totalRead < contentLength) { // Did we hit EOF wbRecordReader the middle of the WARC record's content? throw new IOException("Hit end of file while reading content of WARC record ID: " + tmpHeaderMap.get(WARC_RECORD_ID) + " of supposed content length " + tmpHeaderMap.get(CONTENT_LENGTH) + "."); } tmpGrandTotalBytesRead += totalRead; return retContent; } else { return new byte[0]; } }
From source file:edu.stanford.pigir.warc.WarcRecord.java
License:Open Source License
/** * @param warcLineReader//from w ww . jav a2 s .c o m * @param txtBuf * @param inHeader * @return * @throws IOException */ private static int pullHeaderFromStream(LineAndChunkReader warcLineReader, Text txtBuf) throws IOException { boolean inHeader = true; String line; int bytesRead; int contentLength = -1; String headerAttrName; String headerAttrValue; txtBuf.clear(); while (inHeader && ((bytesRead = warcLineReader.readLine(txtBuf)) != 0)) { line = txtBuf.toString(); tmpGrandTotalBytesRead += bytesRead; if (line.trim().length() == 0) { inHeader = false; } else { String[] thisHeaderPieceParts = line.split(":", 2); if (thisHeaderPieceParts.length == 2) { headerAttrName = (thisHeaderPieceParts[0]).trim().toLowerCase(); headerAttrValue = thisHeaderPieceParts[1].trim(); tmpHeaderMap.put(headerAttrName, headerAttrValue); // Accumulate a list of optional header keys: if (mandatoryHeaderFieldsLookup.get(headerAttrName) == null) tmpOptionalHeaderKeys.add(headerAttrName); if (headerAttrName.startsWith(CONTENT_LENGTH)) { try { contentLength = Integer.parseInt(headerAttrValue.trim()); } catch (NumberFormatException nfEx) { contentLength = -1; } } } } txtBuf.clear(); } return contentLength; }
From source file:edu.stanford.pigir.warc.WarcRecord.java
License:Open Source License
/** * @param warcLineReader//w w w . j a v a 2 s . com * @param txtBuf * @return success true/false * @throws IOException */ private static boolean scanToRecordStart(LineAndChunkReader warcLineReader, Text txtBuf) throws IOException { String line; boolean foundMark = false; int bytesRead; while ((!foundMark) && ((bytesRead = warcLineReader.readLine(txtBuf)) != 0)) { line = txtBuf.toString(); tmpGrandTotalBytesRead += bytesRead; for (String acceptableWarcVersion : WARC_VERSIONS) { if (line.startsWith(acceptableWarcVersion)) { foundMark = true; } } txtBuf.clear(); } return foundMark; }
From source file:edu.stolaf.cs.wmrserver.streaming.StreamKeyValUtil.java
License:Apache License
/** * Read a utf8 encoded line from a data input stream. * @param lineReader LineReader to read the line from. * @param out Text to read into//from www .j a v a 2s. c om * @return number of bytes read * @throws IOException */ public static int readLine(LineReader lineReader, Text out) throws IOException { out.clear(); return lineReader.readLine(out); }
From source file:edu.umn.cs.spatialHadoop.core.RTree.java
License:Open Source License
/** * Builds the RTree given a serialized list of elements. It uses the given * stockObject to deserialize these elements using * {@link TextSerializable#fromText(Text)} and build the tree. Also writes the * created tree to the disk directly.// w ww.ja v a 2 s .c o m * * @param element_bytes * - serialization of all elements separated by new lines * @param offset * - offset of the first byte to use in elements_bytes * @param len * - number of bytes to use in elements_bytes * @param degree * - Degree of the R-tree to build in terms of number of children per * node * @param dataOut * - output stream to write the result to. * @param fast_sort * - setting this to <code>true</code> allows the method to run * faster by materializing the offset of each element in the list * which speeds up the comparison. However, this requires an * additional 16 bytes per element. So, for each 1M elements, the * method will require an additional 16 M bytes (approximately). */ public void bulkLoadWrite(final byte[] element_bytes, final int offset, final int len, final int degree, DataOutput dataOut, final boolean fast_sort) { try { // Count number of elements in the given text int i_start = offset; final Text line = new Text(); while (i_start < offset + len) { int i_end = skipToEOL(element_bytes, i_start); // Extract the line without end of line character line.set(element_bytes, i_start, i_end - i_start - 1); stockObject.fromText(line); elementCount++; i_start = i_end; } LOG.info("Bulk loading an RTree with " + elementCount + " elements"); // It turns out the findBestDegree returns the best degree when the whole // tree is loaded to memory when processed. However, as current algorithms // process the tree while it's on disk, a higher degree should be selected // such that a node fits one file block (assumed to be 4K). //final int degree = findBestDegree(bytesAvailable, elementCount); LOG.info("Writing an RTree with degree " + degree); int height = Math.max(1, (int) Math.ceil(Math.log(elementCount) / Math.log(degree))); int leafNodeCount = (int) Math.pow(degree, height - 1); if (elementCount < 2 * leafNodeCount && height > 1) { height--; leafNodeCount = (int) Math.pow(degree, height - 1); } int nodeCount = (int) ((Math.pow(degree, height) - 1) / (degree - 1)); int nonLeafNodeCount = nodeCount - leafNodeCount; // Keep track of the offset of each element in the text final int[] offsets = new int[elementCount]; final double[] xs = fast_sort ? new double[elementCount] : null; final double[] ys = fast_sort ? new double[elementCount] : null; i_start = offset; line.clear(); for (int i = 0; i < elementCount; i++) { offsets[i] = i_start; int i_end = skipToEOL(element_bytes, i_start); if (xs != null) { // Extract the line with end of line character line.set(element_bytes, i_start, i_end - i_start - 1); stockObject.fromText(line); // Sample center of the shape xs[i] = (stockObject.getMBR().x1 + stockObject.getMBR().x2) / 2; ys[i] = (stockObject.getMBR().y1 + stockObject.getMBR().y2) / 2; } i_start = i_end; } /**A struct to store information about a split*/ class SplitStruct extends Rectangle { /**Start and end index for this split*/ int index1, index2; /**Direction of this split*/ byte direction; /**Index of first element on disk*/ int offsetOfFirstElement; static final byte DIRECTION_X = 0; static final byte DIRECTION_Y = 1; SplitStruct(int index1, int index2, byte direction) { this.index1 = index1; this.index2 = index2; this.direction = direction; } @Override public void write(DataOutput out) throws IOException { out.writeInt(offsetOfFirstElement); super.write(out); } void partition(Queue<SplitStruct> toBePartitioned) { IndexedSortable sortableX; IndexedSortable sortableY; if (fast_sort) { // Use materialized xs[] and ys[] to do the comparisons sortableX = new IndexedSortable() { @Override public void swap(int i, int j) { // Swap xs double tempx = xs[i]; xs[i] = xs[j]; xs[j] = tempx; // Swap ys double tempY = ys[i]; ys[i] = ys[j]; ys[j] = tempY; // Swap id int tempid = offsets[i]; offsets[i] = offsets[j]; offsets[j] = tempid; } @Override public int compare(int i, int j) { if (xs[i] < xs[j]) return -1; if (xs[i] > xs[j]) return 1; return 0; } }; sortableY = new IndexedSortable() { @Override public void swap(int i, int j) { // Swap xs double tempx = xs[i]; xs[i] = xs[j]; xs[j] = tempx; // Swap ys double tempY = ys[i]; ys[i] = ys[j]; ys[j] = tempY; // Swap id int tempid = offsets[i]; offsets[i] = offsets[j]; offsets[j] = tempid; } @Override public int compare(int i, int j) { if (ys[i] < ys[j]) return -1; if (ys[i] > ys[j]) return 1; return 0; } }; } else { // No materialized xs and ys. Always deserialize objects to compare sortableX = new IndexedSortable() { @Override public void swap(int i, int j) { // Swap id int tempid = offsets[i]; offsets[i] = offsets[j]; offsets[j] = tempid; } @Override public int compare(int i, int j) { // Get end of line int eol = skipToEOL(element_bytes, offsets[i]); line.set(element_bytes, offsets[i], eol - offsets[i] - 1); stockObject.fromText(line); double xi = (stockObject.getMBR().x1 + stockObject.getMBR().x2) / 2; eol = skipToEOL(element_bytes, offsets[j]); line.set(element_bytes, offsets[j], eol - offsets[j] - 1); stockObject.fromText(line); double xj = (stockObject.getMBR().x1 + stockObject.getMBR().x2) / 2; if (xi < xj) return -1; if (xi > xj) return 1; return 0; } }; sortableY = new IndexedSortable() { @Override public void swap(int i, int j) { // Swap id int tempid = offsets[i]; offsets[i] = offsets[j]; offsets[j] = tempid; } @Override public int compare(int i, int j) { int eol = skipToEOL(element_bytes, offsets[i]); line.set(element_bytes, offsets[i], eol - offsets[i] - 1); stockObject.fromText(line); double yi = (stockObject.getMBR().y1 + stockObject.getMBR().y2) / 2; eol = skipToEOL(element_bytes, offsets[j]); line.set(element_bytes, offsets[j], eol - offsets[j] - 1); stockObject.fromText(line); double yj = (stockObject.getMBR().y1 + stockObject.getMBR().y2) / 2; if (yi < yj) return -1; if (yi > yj) return 1; return 0; } }; } final IndexedSorter sorter = new QuickSort(); final IndexedSortable[] sortables = new IndexedSortable[2]; sortables[SplitStruct.DIRECTION_X] = sortableX; sortables[SplitStruct.DIRECTION_Y] = sortableY; sorter.sort(sortables[direction], index1, index2); // Partition into maxEntries partitions (equally) and // create a SplitStruct for each partition int i1 = index1; for (int iSplit = 0; iSplit < degree; iSplit++) { int i2 = index1 + (index2 - index1) * (iSplit + 1) / degree; SplitStruct newSplit = new SplitStruct(i1, i2, (byte) (1 - direction)); toBePartitioned.add(newSplit); i1 = i2; } } } // All nodes stored in level-order traversal Vector<SplitStruct> nodes = new Vector<SplitStruct>(); final Queue<SplitStruct> toBePartitioned = new LinkedList<SplitStruct>(); toBePartitioned.add(new SplitStruct(0, elementCount, SplitStruct.DIRECTION_X)); while (!toBePartitioned.isEmpty()) { SplitStruct split = toBePartitioned.poll(); if (nodes.size() < nonLeafNodeCount) { // This is a non-leaf split.partition(toBePartitioned); } nodes.add(split); } if (nodes.size() != nodeCount) { throw new RuntimeException( "Expected node count: " + nodeCount + ". Real node count: " + nodes.size()); } // Now we have our data sorted in the required order. Start building // the tree. // Store the offset of each leaf node in the tree FSDataOutputStream fakeOut = null; try { fakeOut = new FSDataOutputStream(new java.io.OutputStream() { // Null output stream @Override public void write(int b) throws IOException { // Do nothing } @Override public void write(byte[] b, int off, int len) throws IOException { // Do nothing } @Override public void write(byte[] b) throws IOException { // Do nothing } }, null, TreeHeaderSize + nodes.size() * NodeSize); for (int i_leaf = nonLeafNodeCount, i = 0; i_leaf < nodes.size(); i_leaf++) { nodes.elementAt(i_leaf).offsetOfFirstElement = (int) fakeOut.getPos(); if (i != nodes.elementAt(i_leaf).index1) throw new RuntimeException(); double x1, y1, x2, y2; // Initialize MBR to first object int eol = skipToEOL(element_bytes, offsets[i]); fakeOut.write(element_bytes, offsets[i], eol - offsets[i]); line.set(element_bytes, offsets[i], eol - offsets[i] - 1); stockObject.fromText(line); Rectangle mbr = stockObject.getMBR(); x1 = mbr.x1; y1 = mbr.y1; x2 = mbr.x2; y2 = mbr.y2; i++; while (i < nodes.elementAt(i_leaf).index2) { eol = skipToEOL(element_bytes, offsets[i]); fakeOut.write(element_bytes, offsets[i], eol - offsets[i]); line.set(element_bytes, offsets[i], eol - offsets[i] - 1); stockObject.fromText(line); mbr = stockObject.getMBR(); if (mbr.x1 < x1) x1 = mbr.x1; if (mbr.y1 < y1) y1 = mbr.y1; if (mbr.x2 > x2) x2 = mbr.x2; if (mbr.y2 > y2) y2 = mbr.y2; i++; } nodes.elementAt(i_leaf).set(x1, y1, x2, y2); } } finally { if (fakeOut != null) fakeOut.close(); } // Calculate MBR and offsetOfFirstElement for non-leaves for (int i_node = nonLeafNodeCount - 1; i_node >= 0; i_node--) { int i_first_child = i_node * degree + 1; nodes.elementAt(i_node).offsetOfFirstElement = nodes.elementAt(i_first_child).offsetOfFirstElement; int i_child = 0; Rectangle mbr; mbr = nodes.elementAt(i_first_child + i_child); double x1 = mbr.x1; double y1 = mbr.y1; double x2 = mbr.x2; double y2 = mbr.y2; i_child++; while (i_child < degree) { mbr = nodes.elementAt(i_first_child + i_child); if (mbr.x1 < x1) x1 = mbr.x1; if (mbr.y1 < y1) y1 = mbr.y1; if (mbr.x2 > x2) x2 = mbr.x2; if (mbr.y2 > y2) y2 = mbr.y2; i_child++; } nodes.elementAt(i_node).set(x1, y1, x2, y2); } // Start writing the tree // write tree header (including size) // Total tree size. (== Total bytes written - 8 bytes for the size itself) dataOut.writeInt(TreeHeaderSize + NodeSize * nodeCount + len); // Tree height dataOut.writeInt(height); // Degree dataOut.writeInt(degree); dataOut.writeInt(elementCount); // write nodes for (SplitStruct node : nodes) { node.write(dataOut); } // write elements for (int element_i = 0; element_i < elementCount; element_i++) { int eol = skipToEOL(element_bytes, offsets[element_i]); dataOut.write(element_bytes, offsets[element_i], eol - offsets[element_i]); } } catch (IOException e) { e.printStackTrace(); } }
From source file:edu.umn.cs.spatialHadoop.delaunay.DelaunayTriangulation.java
License:Open Source License
/** * Compute the Deluanay triangulation in the local machine * @param inPaths/*w w w . j ava 2 s . c o m*/ * @param outPath * @param params * @throws IOException * @throws InterruptedException */ public static void delaunayLocal(Path[] inPaths, Path outPath, final OperationsParams params) throws IOException, InterruptedException { if (params.getBoolean("mem", false)) MemoryReporter.startReporting(); // 1- Split the input path/file to get splits that can be processed // independently final SpatialInputFormat3<Rectangle, Point> inputFormat = new SpatialInputFormat3<Rectangle, Point>(); Job job = Job.getInstance(params); SpatialInputFormat3.setInputPaths(job, inPaths); final List<InputSplit> splits = inputFormat.getSplits(job); final Point[][] allLists = new Point[splits.size()][]; // 2- Read all input points in memory LOG.info("Reading points from " + splits.size() + " splits"); List<Integer> numsPoints = Parallel.forEach(splits.size(), new RunnableRange<Integer>() { @Override public Integer run(int i1, int i2) { try { int numPoints = 0; for (int i = i1; i < i2; i++) { List<Point> points = new ArrayList<Point>(); FileSplit fsplit = (FileSplit) splits.get(i); final RecordReader<Rectangle, Iterable<Point>> reader = inputFormat .createRecordReader(fsplit, null); if (reader instanceof SpatialRecordReader3) { ((SpatialRecordReader3) reader).initialize(fsplit, params); } else if (reader instanceof RTreeRecordReader3) { ((RTreeRecordReader3) reader).initialize(fsplit, params); } else if (reader instanceof HDFRecordReader) { ((HDFRecordReader) reader).initialize(fsplit, params); } else { throw new RuntimeException("Unknown record reader"); } while (reader.nextKeyValue()) { Iterable<Point> pts = reader.getCurrentValue(); for (Point p : pts) { points.add(p.clone()); } } reader.close(); numPoints += points.size(); allLists[i] = points.toArray(new Point[points.size()]); } return numPoints; } catch (IOException e) { e.printStackTrace(); } catch (InterruptedException e) { e.printStackTrace(); } return null; } }, params.getInt("parallel", Runtime.getRuntime().availableProcessors())); int totalNumPoints = 0; for (int numPoints : numsPoints) totalNumPoints += numPoints; LOG.info("Read " + totalNumPoints + " points and merging into one list"); Point[] allPoints = new Point[totalNumPoints]; int pointer = 0; for (int iList = 0; iList < allLists.length; iList++) { System.arraycopy(allLists[iList], 0, allPoints, pointer, allLists[iList].length); pointer += allLists[iList].length; allLists[iList] = null; // To let the GC collect it } if (params.getBoolean("dedup", true)) { float threshold = params.getFloat("threshold", 1E-5f); allPoints = SpatialAlgorithms.deduplicatePoints(allPoints, threshold); } LOG.info("Computing DT for " + allPoints.length + " points"); GSDTAlgorithm dtAlgorithm = new GSImprovedAlgorithm(allPoints, null); LOG.info("DT computed"); Rectangle mbr = FileMBR.fileMBR(inPaths, params); double buffer = Math.max(mbr.getWidth(), mbr.getHeight()) / 10; Rectangle bigMBR = mbr.buffer(buffer, buffer); if (outPath != null && params.getBoolean("output", true)) { LOG.info("Writing the output as a soup of triangles"); Triangulation answer = dtAlgorithm.getFinalTriangulation(); FileSystem outFS = outPath.getFileSystem(params); PrintStream out = new PrintStream(outFS.create(outPath)); Text text = new Text2(); byte[] tab = "\t".getBytes(); for (Point[] triangle : answer.iterateTriangles()) { text.clear(); triangle[0].toText(text); text.append(tab, 0, tab.length); triangle[1].toText(text); text.append(tab, 0, tab.length); triangle[2].toText(text); out.println(text); } out.close(); } // dtAlgorithm.getFinalTriangulation().draw(); //Triangulation finalPart = new Triangulation(); //Triangulation nonfinalPart = new Triangulation(); //dtAlgorithm.splitIntoFinalAndNonFinalParts(new Rectangle(-180, -90, 180, 90), finalPart, nonfinalPart); }
From source file:edu.umn.cs.spatialHadoop.delaunay.Triangulation.java
License:Open Source License
/** * Draw as SVG Rasem commands.//w w w . j a va2 s . c o m */ public void draw(PrintStream out, Rectangle mbr, double scale) { System.out.println("group {"); Text text = new Text(); for (Point s : sites) { text.clear(); System.out.printf("circle %f, %f, 0.5 # %s\n", (s.x - mbr.x1) * scale, (s.y - mbr.y1) * scale, s.toText(text).toString()); } System.out.println("}"); System.out.println("group {"); for (int i = 0; i < edgeStarts.length; i++) { if (edgeStarts[i] < edgeEnds[i]) System.out.printf("line %f, %f, %f, %f\n", (sites[edgeStarts[i]].x - mbr.x1) * scale, (sites[edgeStarts[i]].y - mbr.y1) * scale, (sites[edgeEnds[i]].x - mbr.x1) * scale, (sites[edgeEnds[i]].y - mbr.y1) * scale); } System.out.println("}"); }
From source file:edu.umn.cs.spatialHadoop.indexing.RTree.java
License:Open Source License
/** * Builds the RTree given a serialized list of elements. It uses the given * stockObject to deserialize these elements using * {@link TextSerializable#fromText(Text)} and build the tree. Also writes the * created tree to the disk directly./*from ww w . j a v a 2 s.c o m*/ * * @param element_bytes * - serialization of all elements separated by new lines * @param offset * - offset of the first byte to use in elements_bytes * @param len * - number of bytes to use in elements_bytes * @param degree * - Degree of the R-tree to build in terms of number of children per * node * @param dataOut * - output stream to write the result to. * @param fast_sort * - setting this to <code>true</code> allows the method to run * faster by materializing the offset of each element in the list * which speeds up the comparison. However, this requires an * additional 16 bytes per element. So, for each 1M elements, the * method will require an additional 16 M bytes (approximately). */ public static void bulkLoadWrite(final byte[] element_bytes, final int offset, final int len, final int degree, DataOutput dataOut, final Shape stockObject, final boolean fast_sort) { try { int elementCount = 0; // Count number of elements in the given text int i_start = offset; final Text line = new Text(); while (i_start < offset + len) { int i_end = skipToEOL(element_bytes, i_start); // Extract the line without end of line character line.set(element_bytes, i_start, i_end - i_start - 1); stockObject.fromText(line); elementCount++; i_start = i_end; } LOG.info("Bulk loading an RTree with " + elementCount + " elements"); // It turns out the findBestDegree returns the best degree when the whole // tree is loaded to memory when processed. However, as current algorithms // process the tree while it's on disk, a higher degree should be selected // such that a node fits one file block (assumed to be 4K). //final int degree = findBestDegree(bytesAvailable, elementCount); int height = Math.max(1, (int) Math.ceil(Math.log(elementCount) / Math.log(degree))); int leafNodeCount = (int) Math.pow(degree, height - 1); if (elementCount < 2 * leafNodeCount && height > 1) { height--; leafNodeCount = (int) Math.pow(degree, height - 1); } int nodeCount = (int) ((Math.pow(degree, height) - 1) / (degree - 1)); int nonLeafNodeCount = nodeCount - leafNodeCount; // Keep track of the offset of each element in the text final int[] offsets = new int[elementCount]; final double[] xs = fast_sort ? new double[elementCount] : null; final double[] ys = fast_sort ? new double[elementCount] : null; i_start = offset; line.clear(); for (int i = 0; i < elementCount; i++) { offsets[i] = i_start; int i_end = skipToEOL(element_bytes, i_start); if (xs != null) { // Extract the line with end of line character line.set(element_bytes, i_start, i_end - i_start - 1); stockObject.fromText(line); // Sample center of the shape xs[i] = (stockObject.getMBR().x1 + stockObject.getMBR().x2) / 2; ys[i] = (stockObject.getMBR().y1 + stockObject.getMBR().y2) / 2; } i_start = i_end; } /**A struct to store information about a split*/ class SplitStruct extends Rectangle { /**Start and end index for this split*/ int index1, index2; /**Direction of this split*/ byte direction; /**Index of first element on disk*/ int offsetOfFirstElement; static final byte DIRECTION_X = 0; static final byte DIRECTION_Y = 1; SplitStruct(int index1, int index2, byte direction) { this.index1 = index1; this.index2 = index2; this.direction = direction; } @Override public void write(DataOutput out) throws IOException { out.writeInt(offsetOfFirstElement); super.write(out); } void partition(Queue<SplitStruct> toBePartitioned) { IndexedSortable sortableX; IndexedSortable sortableY; if (fast_sort) { // Use materialized xs[] and ys[] to do the comparisons sortableX = new IndexedSortable() { @Override public void swap(int i, int j) { // Swap xs double tempx = xs[i]; xs[i] = xs[j]; xs[j] = tempx; // Swap ys double tempY = ys[i]; ys[i] = ys[j]; ys[j] = tempY; // Swap id int tempid = offsets[i]; offsets[i] = offsets[j]; offsets[j] = tempid; } @Override public int compare(int i, int j) { if (xs[i] < xs[j]) return -1; if (xs[i] > xs[j]) return 1; return 0; } }; sortableY = new IndexedSortable() { @Override public void swap(int i, int j) { // Swap xs double tempx = xs[i]; xs[i] = xs[j]; xs[j] = tempx; // Swap ys double tempY = ys[i]; ys[i] = ys[j]; ys[j] = tempY; // Swap id int tempid = offsets[i]; offsets[i] = offsets[j]; offsets[j] = tempid; } @Override public int compare(int i, int j) { if (ys[i] < ys[j]) return -1; if (ys[i] > ys[j]) return 1; return 0; } }; } else { // No materialized xs and ys. Always deserialize objects to compare sortableX = new IndexedSortable() { @Override public void swap(int i, int j) { // Swap id int tempid = offsets[i]; offsets[i] = offsets[j]; offsets[j] = tempid; } @Override public int compare(int i, int j) { // Get end of line int eol = skipToEOL(element_bytes, offsets[i]); line.set(element_bytes, offsets[i], eol - offsets[i] - 1); stockObject.fromText(line); double xi = (stockObject.getMBR().x1 + stockObject.getMBR().x2) / 2; eol = skipToEOL(element_bytes, offsets[j]); line.set(element_bytes, offsets[j], eol - offsets[j] - 1); stockObject.fromText(line); double xj = (stockObject.getMBR().x1 + stockObject.getMBR().x2) / 2; if (xi < xj) return -1; if (xi > xj) return 1; return 0; } }; sortableY = new IndexedSortable() { @Override public void swap(int i, int j) { // Swap id int tempid = offsets[i]; offsets[i] = offsets[j]; offsets[j] = tempid; } @Override public int compare(int i, int j) { int eol = skipToEOL(element_bytes, offsets[i]); line.set(element_bytes, offsets[i], eol - offsets[i] - 1); stockObject.fromText(line); double yi = (stockObject.getMBR().y1 + stockObject.getMBR().y2) / 2; eol = skipToEOL(element_bytes, offsets[j]); line.set(element_bytes, offsets[j], eol - offsets[j] - 1); stockObject.fromText(line); double yj = (stockObject.getMBR().y1 + stockObject.getMBR().y2) / 2; if (yi < yj) return -1; if (yi > yj) return 1; return 0; } }; } final IndexedSorter sorter = new QuickSort(); final IndexedSortable[] sortables = new IndexedSortable[2]; sortables[SplitStruct.DIRECTION_X] = sortableX; sortables[SplitStruct.DIRECTION_Y] = sortableY; sorter.sort(sortables[direction], index1, index2); // Partition into maxEntries partitions (equally) and // create a SplitStruct for each partition int i1 = index1; for (int iSplit = 0; iSplit < degree; iSplit++) { int i2 = index1 + (index2 - index1) * (iSplit + 1) / degree; SplitStruct newSplit = new SplitStruct(i1, i2, (byte) (1 - direction)); toBePartitioned.add(newSplit); i1 = i2; } } } // All nodes stored in level-order traversal Vector<SplitStruct> nodes = new Vector<SplitStruct>(); final Queue<SplitStruct> toBePartitioned = new LinkedList<SplitStruct>(); toBePartitioned.add(new SplitStruct(0, elementCount, SplitStruct.DIRECTION_X)); while (!toBePartitioned.isEmpty()) { SplitStruct split = toBePartitioned.poll(); if (nodes.size() < nonLeafNodeCount) { // This is a non-leaf split.partition(toBePartitioned); } nodes.add(split); } if (nodes.size() != nodeCount) { throw new RuntimeException( "Expected node count: " + nodeCount + ". Real node count: " + nodes.size()); } // Now we have our data sorted in the required order. Start building // the tree. // Store the offset of each leaf node in the tree FSDataOutputStream fakeOut = null; try { fakeOut = new FSDataOutputStream(new java.io.OutputStream() { // Null output stream @Override public void write(int b) throws IOException { // Do nothing } @Override public void write(byte[] b, int off, int len) throws IOException { // Do nothing } @Override public void write(byte[] b) throws IOException { // Do nothing } }, null, TreeHeaderSize + nodes.size() * NodeSize); for (int i_leaf = nonLeafNodeCount, i = 0; i_leaf < nodes.size(); i_leaf++) { nodes.elementAt(i_leaf).offsetOfFirstElement = (int) fakeOut.getPos(); if (i != nodes.elementAt(i_leaf).index1) throw new RuntimeException(); double x1, y1, x2, y2; // Initialize MBR to first object int eol = skipToEOL(element_bytes, offsets[i]); fakeOut.write(element_bytes, offsets[i], eol - offsets[i]); line.set(element_bytes, offsets[i], eol - offsets[i] - 1); stockObject.fromText(line); Rectangle mbr = stockObject.getMBR(); x1 = mbr.x1; y1 = mbr.y1; x2 = mbr.x2; y2 = mbr.y2; i++; while (i < nodes.elementAt(i_leaf).index2) { eol = skipToEOL(element_bytes, offsets[i]); fakeOut.write(element_bytes, offsets[i], eol - offsets[i]); line.set(element_bytes, offsets[i], eol - offsets[i] - 1); stockObject.fromText(line); mbr = stockObject.getMBR(); if (mbr.x1 < x1) x1 = mbr.x1; if (mbr.y1 < y1) y1 = mbr.y1; if (mbr.x2 > x2) x2 = mbr.x2; if (mbr.y2 > y2) y2 = mbr.y2; i++; } nodes.elementAt(i_leaf).set(x1, y1, x2, y2); } } finally { if (fakeOut != null) fakeOut.close(); } // Calculate MBR and offsetOfFirstElement for non-leaves for (int i_node = nonLeafNodeCount - 1; i_node >= 0; i_node--) { int i_first_child = i_node * degree + 1; nodes.elementAt(i_node).offsetOfFirstElement = nodes.elementAt(i_first_child).offsetOfFirstElement; int i_child = 0; Rectangle mbr; mbr = nodes.elementAt(i_first_child + i_child); double x1 = mbr.x1; double y1 = mbr.y1; double x2 = mbr.x2; double y2 = mbr.y2; i_child++; while (i_child < degree) { mbr = nodes.elementAt(i_first_child + i_child); if (mbr.x1 < x1) x1 = mbr.x1; if (mbr.y1 < y1) y1 = mbr.y1; if (mbr.x2 > x2) x2 = mbr.x2; if (mbr.y2 > y2) y2 = mbr.y2; i_child++; } nodes.elementAt(i_node).set(x1, y1, x2, y2); } // Start writing the tree // write tree header (including size) // Total tree size. (== Total bytes written - 8 bytes for the size itself) dataOut.writeInt(TreeHeaderSize + NodeSize * nodeCount + len); // Tree height dataOut.writeInt(height); // Degree dataOut.writeInt(degree); dataOut.writeInt(elementCount); // write nodes for (SplitStruct node : nodes) { node.write(dataOut); } // write elements for (int element_i = 0; element_i < elementCount; element_i++) { int eol = skipToEOL(element_bytes, offsets[element_i]); dataOut.write(element_bytes, offsets[element_i], eol - offsets[element_i]); } } catch (IOException e) { e.printStackTrace(); } }
From source file:edu.umn.cs.spatialHadoop.io.TextSerializerHelper.java
License:Open Source License
public static synchronized Geometry consumeGeometryJTS(Text text, char separator) { // Check whether this text is a Well Known Text (WKT) or a hexed string boolean wkt = false; byte[] bytes = text.getBytes(); int length = text.getLength(); Geometry geom;//from ww w . j a va 2 s . c o m int i1, i2; // Start and end offset of the geometry being parsed int i_next; // Beginning of the next field boolean isWKT = false; boolean isHex = false; if (bytes[0] == '\'' || bytes[0] == '\"') { // A quoted string. Find terminating quote and trim the quotes i1 = 1; i2 = 2; while (i2 < length && bytes[i2] != bytes[0]) i2++; if (i2 == length) throw new RuntimeException("Unterminated quoted string"); i_next = i2 + 1; i2--; // Back one step to remove the terminating quote isWKT = true; // Assume any quoted string to be WKT } else { // Not a quoted string, check if the type is WKT int i_shape = 0; while (!wkt && i_shape < ShapeNames.length) { byte[] shapeName = ShapeNames[i_shape]; if (length > shapeName.length) { int i = 0; while (i < shapeName.length && shapeName[i] == bytes[i]) i++; if (i == shapeName.length) { wkt = true; break; } } i_shape++; } if (i_shape < ShapeNames.length) { isWKT = true; // Look for the terminator of the shape text i1 = 0; i2 = 1; // Search for the first open parenthesis while (i2 < length && bytes[i2] != '(') i2++; if (i2 < length) i2++; // Skip the open parenthesis itself int nesting = 1; while (i2 < length && nesting > 0) { if (bytes[i2] == '(') nesting++; else if (bytes[i2] == ')') nesting--; i2++; } i_next = i2 + 1; } else { // Check if the type is hex-encoded WKB i1 = 0; i2 = 0; while (i2 < length && IsHex[bytes[i2]]) i2++; isHex = i2 > 1; i_next = i2; } } String geom_text = new String(bytes, i1, i2); try { if (isWKT) { geom = wktReader.read(geom_text); } else if (isHex) { byte[] binary = hexToBytes(geom_text); geom = wkbReader.read(binary); } else { geom = null; } } catch (ParseException e) { throw new RuntimeException(String.format("Error parsing '%s'", geom_text), e); } // Remove consumed bytes from the text if (i_next >= text.getLength()) text.clear(); else { if (bytes[i_next] == separator) i_next++; text.set(bytes, i_next, length - i_next); } return geom; }